In [13]:
import sys
import random
import os, numpy as np
import torch
import torchvision.transforms as transforms
import torch.utils.data as data

from skimage.transform import resize
from scipy.sparse import csr_matrix
from PIL import Image
import xml.etree.ElementTree as ET

import cv2

import matplotlib.pyplot as plt

VOC_CLASSES = (
    "__background__",
    "aeroplane",
    "bicycle",
    "bird",
    "boat",
    "bottle",
    "bus",
    "car",
    "cat",
    "chair",
    "cow",
    "diningtable",
    "dog",
    "horse",
    "motorbike",
    "person",
    "pottedplant",
    "sheep",
    "sofa",
    "train",
    "tvmonitor",
)


class VocDataset(data.Dataset):
    def __init__(self, data_path, dataset_split, transform, random_crops=0):
        self.data_path = data_path
        self.transform = transform
        self.random_crops = random_crops
        self.dataset_split = dataset_split

        self.__init_classes()
        (
            self.names,
            self.labels,
            self.box_indices,
            self.label_order,
        ) = self.__dataset_info()

    def __getitem__(self, index):
        # CHANGED
        #         x = imread(self.data_path + '/JPEGImages/' + self.names[index] + '.jpg', mode='RGB')
        #         x = Image.fromarray(x)
        x = Image.open(self.data_path + "/JPEGImages/" + self.names[index] + ".jpg")

        scale = np.random.rand() * 2 + 0.25
        w = int(x.size[0] * scale)
        h = int(x.size[1] * scale)
        if min(w, h) < 227:
            scale = 227 / min(w, h)
            w = int(x.size[0] * scale)
            h = int(x.size[1] * scale)

        if self.random_crops == 0:
            x = self.transform(x)
        else:
            crops = []
            for i in range(self.random_crops):
                crops.append(self.transform(x))
            x = torch.stack(crops)

        y = self.labels[index]
        return x, y

    def __len__(self):
        return len(self.names)

    def __init_classes(self):
        self.classes = VOC_CLASSES
        self.num_classes = len(self.classes)
        self.class_to_ind = dict(zip(self.classes, range(self.num_classes)))

    def __dataset_info(self):
        with open(
            self.data_path + "/ImageSets/Main/" + self.dataset_split + ".txt"
        ) as f:
            annotations = f.readlines()

        annotations = [n[:-1] for n in annotations]
        box_indices = []
        names = []
        labels = []
        label_order = []
        for af in annotations:
            if len(af) != 6:
                continue
            filename = os.path.join(self.data_path, "Annotations", af)
            tree = ET.parse(filename + ".xml")
            objs = tree.findall("object")
            num_objs = len(objs)

            boxes = np.zeros((num_objs, 4), dtype=np.int32)
            boxes_cl = np.zeros((num_objs), dtype=np.int32)
            boxes_cla = []
            temp_label = []
            for ix, obj in enumerate(objs):
                bbox = obj.find("bndbox")
                # Make pixel indexes 0-based
                x1 = float(bbox.find("xmin").text) - 1
                y1 = float(bbox.find("ymin").text) - 1
                x2 = float(bbox.find("xmax").text) - 1
                y2 = float(bbox.find("ymax").text) - 1

                cls = self.class_to_ind[obj.find("name").text.lower().strip()]
                boxes[ix, :] = [x1, y1, x2, y2]
                boxes_cl[ix] = cls
                boxes_cla.append(boxes[ix, :])
                temp_label.append(cls)

            lbl = np.zeros(self.num_classes)
            lbl[boxes_cl] = 1
            labels.append(lbl)
            names.append(af)
            box_indices.append(boxes_cla)
            label_order.append(temp_label)

        return (
            np.array(names),
            np.array(labels).astype(np.float32),
            np.array(box_indices),
            label_order,
        )

In [14]:
import os
import csv
import numpy as np


def write_csv(file_path, y_list):
    solution_rows = [("id", "category")] + [(i, 1 - y) for (i, y) in enumerate(y_list)]
    with open(file_path, "w") as f:
        writer = csv.writer(f)
        writer.writerows(solution_rows)


def output_submission_csv(output_file_path, y_test):
    write_csv(output_file_path, y_test)

# Assignment 2 Part 2: Developing Your Own Classifier

In [15]:
import os
import numpy as np
import torch
import torch.nn as nn
import torchvision

from torchvision import transforms
from sklearn.metrics import average_precision_score
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
# from kaggle_submission import output_submission_csv
# from classifier import SimpleClassifier, Classifier#, AlexNet
# from voc_dataloader import VocDataset, VOC_CLASSES

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Part 2: Design your own network

In this notebook, your task is to create and train your own model for multi-label classification on VOC Pascal.

## What to do
1. You will make change on network architecture in ```classifier.py```.
2. You may also want to change other hyperparameters to assist your training to get a better performances. Hints will be given in the below instructions.

## What to submit
Check the submission template for details what to submit. 

In [16]:
def train_classifier(train_loader, classifier, criterion, optimizer):
    classifier.train()
    loss_ = 0.0
    losses = []
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = classifier(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        losses.append(loss)
    return torch.stack(losses).mean().item()

In [17]:

def test_classifier(test_loader, classifier, criterion, print_ind_classes=True, print_total=True):
    classifier.eval()
    losses = []
    with torch.no_grad():
        y_true = np.zeros((0,21))
        y_score = np.zeros((0,21))
        for i, (images, labels) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            logits = classifier(images)
            y_true = np.concatenate((y_true, labels.cpu().numpy()), axis=0)
            y_score = np.concatenate((y_score, logits.cpu().numpy()), axis=0)
            loss = criterion(logits, labels)
            losses.append(loss.item())
        aps = []
        # ignore first class which is background
        for i in range(1, y_true.shape[1]):
            ap = average_precision_score(y_true[:, i], y_score[:, i])
            if print_ind_classes:
                print('-------  Class: {:<12}     AP: {:>8.4f}  -------'.format(VOC_CLASSES[i], ap))
            aps.append(ap)
        
        mAP = np.mean(aps)
        test_loss = np.mean(losses)
        if print_total:
            print('mAP: {0:.4f}'.format(mAP))
            print('Avg loss: {}'.format(test_loss))
        
    return mAP, test_loss, aps

In [18]:

def plot_losses(train, val, test_frequency, num_epochs):
    plt.plot(train, label="train")
    indices = [i for i in range(num_epochs) if ((i+1)%test_frequency == 0 or i ==0)]
    plt.plot(indices, val, label="val")
    plt.title("Loss Plot")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.show()
    
def plot_mAP(train, val, test_frequency, num_epochs):
    indices = [i for i in range(num_epochs) if ((i+1)%test_frequency == 0 or i ==0)]
    plt.plot(indices, train, label="train")
    plt.plot(indices, val, label="val")
    plt.title("mAP Plot")
    plt.ylabel("mAP")
    plt.xlabel("Epoch")
    plt.legend()
    plt.show()
    

In [19]:

def train(classifier, num_epochs, train_loader, val_loader, criterion, optimizer, test_frequency=5):
    train_losses = []
    train_mAPs = []
    val_losses = []
    val_mAPs = []
    
    for epoch in range(1,num_epochs+1):
        print("Starting epoch number " + str(epoch))
        train_loss = train_classifier(train_loader, classifier, criterion, optimizer)

        torch.save(classifier.state_dict(), '/kaggle/working/cnn_weights.pth')
        
        train_losses.append(train_loss)
        print("Loss for Training on Epoch " +str(epoch) + " is "+ str(train_loss))
        if(epoch%test_frequency==0 or epoch==1):
            mAP_train, _, _ = test_classifier(train_loader, classifier, criterion, False, False)
            train_mAPs.append(mAP_train)
            mAP_val, val_loss, _ = test_classifier(val_loader, classifier, criterion)
            print('Evaluating classifier')
            print("Mean Precision Score for Testing on Epoch " +str(epoch) + " is "+ str(mAP_val))
            val_losses.append(val_loss)
            val_mAPs.append(mAP_val)
    
    return classifier, train_losses, val_losses, train_mAPs, val_mAPs

# Developing Your Own Model

### Goal
To meet the benchmark for this assignment you will need to improve the network. Note you should have noticed pretrained Alenxt performs really well, but training Alexnet from scratch performs much worse. We hope you can design a better architecture over both the simple classifier and AlexNet to train from scratch.

### How to start
You may take inspiration from other published architectures and architectures discussed in lecture. However, you are NOT allowed to use predefined models (e.g. models from torchvision) or use pretrained weights. Training must be done from scratch with your own custom model.

#### Some hints
There are a variety of different approaches you should try to improve performance from the simple classifier:

* Network architecture changes
    * Number of layers: try adding layers to make your network deeper
    * Batch normalization: adding batch norm between layers will likely give you a significant performance increase
    * Residual connections: as you increase the depth of your network, you will find that having residual connections like those in ResNet architectures will be helpful
* Optimizer: Instead of plain SGD, you may want to add a learning rate schedule, add momentum, or use one of the other optimizers you have learned about like Adam. Check the `torch.optim` package for other optimizers
* Data augmentation: You should use the `torchvision.transforms` module to try adding random resized crops and horizontal flips of the input data. Check `transforms.RandomResizedCrop` and `transforms.RandomHorizontalFlip` for this. Feel free to apply more [transforms](https://pytorch.org/docs/stable/torchvision/transforms.html) for data augmentation which can lead to better performance. 
* Epochs: Once you have found a generally good hyperparameter setting try training for more epochs
* Loss function: You might want to add weighting to the `MultiLabelSoftMarginLoss` for classes that are less well represented or experiment with a different loss function



#### Note
We will soon be providing some initial expectations of mAP values as a function of epoch so you can get an early idea whether your implementation works without waiting a long time for training to converge.

### What to submit 
Submit your best model to Kaggle and save all plots for the writeup.


In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std= [0.229, 0.224, 0.225])

train_transform = transforms.Compose([
            transforms.Resize(227),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            normalize
        ])

test_transform = transforms.Compose([
            transforms.Resize(227),
            transforms.CenterCrop(227),
            transforms.ToTensor(),
            normalize,
        ])

ds_train = VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform)
ds_val = VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','val',test_transform)
ds_test = VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007test','test', test_transform)




In [21]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from random import *

# Define the transformations
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(randint(10, 350)),
    transforms.RandomPerspective(distortion_scale=0.3, p=1.0),
    transforms.RandomResizedCrop(227),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the original dataset
tempds_train = VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform)

# Apply the transformations to the original dataset to create a new dataset
ds_train = torch.utils.data.ConcatDataset([
    tempds_train,
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    VocDataset('/kaggle/input/pascal2/VOCdevkit_2007/VOC2007','train',train_transform),
    
])





In [22]:
len(ds_train)

17507

In [23]:
num_epochs = 10
test_frequency = 5
batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset=ds_train,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                               num_workers=1)

val_loader = torch.utils.data.DataLoader(dataset=ds_val,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                               num_workers=1)

test_loader = torch.utils.data.DataLoader(dataset=ds_test,
                                               batch_size=batch_size, 
                                               shuffle=False,
                                               num_workers=1)

In [26]:

NUM_CLASSES = 21
import torch.nn.init as init

import torch.nn as nn

class ResBlock(nn.Module):
    
    def __init__(self, input_filters, output_filters, stride=1):
        super(ResBlock, self).__init__()
    
        if stride != 1 or input_filters != output_filters:
            self.reslink = nn.Sequential(nn.Conv2d(input_filters, output_filters, 1, stride, bias=False),nn.BatchNorm2d(output_filters))
        else:
            self.reslink = nn.Sequential()

        self.conv1 = nn.Conv2d(input_filters, output_filters, 3, stride, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(output_filters)

        self.conv2 = nn.Conv2d(output_filters, output_filters, 3, 1, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(output_filters)

        self.relu = nn.ReLU()



    def forward(self, x):
        
        out = self.relu(self.bn1(self.conv1(x)))
        
        out = self.bn2(self.conv2(out))
        
        out += self.reslink(x)
        out = self.relu(out)
        
        return out



class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, 3, 2, 1)

        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(128)
        self.maxpool = nn.MaxPool2d(2,2)

        self.avgpool2 = nn.AvgPool2d((2, 2))
        
        self.relu = nn.ReLU(inplace=True)
        self.resblock1 = ResBlock(64, 128)
        self.resblock2 = ResBlock(128, 256,2) 
        self.resblock3 = ResBlock(256, 512,2)
        self.resblock4 = ResBlock(512, 1024,2)
        
        self.fc1 = nn.Linear(16384, 8192)
        self.fc2 = nn.Linear(8192, 1000)
        self.fc3 = nn.Linear(1000,21)

        self.dropout = nn.Dropout(0.5)

        
    def forward(self, x):

        out = self.relu(self.bn1(self.conv1(x)))
        out = self.maxpool(out)
        

        out = self.resblock1(out)
        out = self.resblock2(out)
        out = self.resblock3(out)
        out = self.resblock4(out) 
        
        out = self.avgpool2(out)

        out = out.view(out.size(0), -1)

        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out) 
        out = self.dropout(out)
        out = self.fc3(out)    
        
        return out


In [27]:

num_epochs = 30
test_frequency = 5
batch_size = 64

classifier = Classifier().to(device)

# classifier.load_state_dict(torch.load('cnn_weights.pth'))

criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=1e-4)

classifier, train_losses, val_losses, train_mAPs, val_mAPs = train(classifier, num_epochs, train_loader, val_loader, criterion, optimizer, test_frequency)


Starting epoch number 1
Loss for Training on Epoch 1 is 0.24324510991573334
-------  Class: aeroplane        AP:   0.3811  -------
-------  Class: bicycle          AP:   0.1498  -------
-------  Class: bird             AP:   0.1353  -------
-------  Class: boat             AP:   0.1666  -------
-------  Class: bottle           AP:   0.0862  -------
-------  Class: bus              AP:   0.0696  -------
-------  Class: car              AP:   0.2741  -------
-------  Class: cat              AP:   0.2250  -------
-------  Class: chair            AP:   0.2393  -------
-------  Class: cow              AP:   0.0797  -------
-------  Class: diningtable      AP:   0.1743  -------
-------  Class: dog              AP:   0.1550  -------
-------  Class: horse            AP:   0.1552  -------
-------  Class: motorbike        AP:   0.1420  -------
-------  Class: person           AP:   0.4995  -------
-------  Class: pottedplant      AP:   0.1039  -------
-------  Class: sheep            AP:   0.088

KeyboardInterrupt: 

In [32]:
plot_losses(train_losses, val_losses, test_frequency, num_epochs)
plot_mAP(train_mAPs, val_mAPs, test_frequency, num_epochs)

NameError: name 'train_losses' is not defined

In [None]:

mAP_test, test_loss, test_aps = test_classifier(test_loader, classifier, criterion)
print(mAP_test)
                                       
output_submission_csv('my_solution.csv', test_aps)

In [None]:
torch.save(classifier.state_dict(), '/content/cnn_weights.pth')


classifier2 = Classifier().to(device)
classifier2.load_state_dict(torch.load('cnn_weights.pth'))
print(classifier2)