In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

FOLDERNAME = 'COMP4211/pa2/'

import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/COMP4211/pa2


# 4.1

In [2]:
import torch
import numpy as np
import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset
import os
import os.path as osp
import matplotlib.pyplot as plt
import torchvision.transforms as T

import torchvision.transforms.functional as F
from torchvision.transforms.functional import InterpolationMode, _interpolation_modes_from_int

from torch.utils.data import DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
# [C1]

class RetrievalDataset(Dataset):
    def __init__(self, csv_dir, image_dir='pa2_data', transform=None):
        info_df = pd.read_csv(csv_dir)
        self.query_dir = {}
        self.gallery_dir = {}
        for index, rows in info_df.iterrows():
            self.query_dir[index] = rows['query']
            self.gallery_dir[index] = rows['gallery']
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        query = self.query_dir[idx]
        img_name = os.path.join(self.image_dir, query)
        image = Image.open(img_name)
        label = self.gallery_dir[idx]
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

    def __len__(self):
        return len(self.query_dir)
    
    def get_gallery_imgs(self):
        gallery_imgs = []

        for gallery in self.gallery_dir.values():
            img_name = os.path.join(self.image_dir, gallery)
            image = Image.open(img_name)
            
            if self.transform is not None:
                image = self.transform(image)

            gallery_imgs.append(image)

        return gallery_imgs

# [C2]

class CustomPad(torch.nn.Module):
    
    def __init__(self, max_x, max_y, fill_v=0):
        super(CustomPad, self).__init__()
        self.max_x = max_x
        self.max_y = max_y
        self.fill_v = fill_v

    def forward(self, img):
        y = img.shape[1]
        x = img.shape[2]
        new_img = torch.ones((img.shape[0], self.max_y, self.max_x)) * self.fill_v

        if y != self.max_y:
            dy = (self.max_y - y) // 2
            new_img[:, dy:dy+y, :] = img[:, :, :]
        elif x != self.max_x:
            dx = (self.max_x - x) // 2
            new_img[:, :, dx:dx+x] = img[:, :, :]

        return new_img

class CustomResize(torch.nn.Module):

    def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
        super().__init__()
        # if not isinstance(size, (int, Sequence)):
        #     raise TypeError("Size should be int or sequence. Got {}".format(type(size)))
        # if isinstance(size, Sequence) and len(size) not in (1, 2):
        #     raise ValueError("If size is a sequence, it should have 1 or 2 values")
        
        self.size = size
        self.max_size = max_size

        # Backward compatibility with integer value
        if isinstance(interpolation, int):
            warnings.warn(
                "Argument interpolation should be of type InterpolationMode instead of int. "
                "Please, use InterpolationMode enum."
            )
            interpolation = _interpolation_modes_from_int(interpolation)

        self.interpolation = interpolation
        self.antialias = antialias

    def forward(self, img):
        y = img.shape[1]
        x = img.shape[2]
        if x <= y:
            x_new = round(self.size * x / y)
            return F.resize(img, (self.size, x_new), self.interpolation, self.max_size, self.antialias)
        else:
            y_new = round(self.size * y / x)
            return F.resize(img, (y_new, self.size), self.interpolation, self.max_size, self.antialias)

    def __repr__(self):
        interpolate_str = self.interpolation.value
        return self.__class__.__name__ + '(size={0}, interpolation={1}, max_size={2}, antialias={3})'.format(
            self.size, interpolate_str, self.max_size, self.antialias)

transform = T.Compose([T.ToTensor(),
                       T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                       CustomResize(128),
                       CustomPad(max_x=128, max_y=128)])

val_RetrievalDataset = RetrievalDataset('pa2_data/val/gt.csv', transform=transform)
test_RetrievalDataset = RetrievalDataset('pa2_data/test/pred.csv', transform=transform)

# [C3]

val_RetrievalDataset_loader = DataLoader(val_RetrievalDataset, shuffle=True)
test_RetrievalDataset_loader = DataLoader(test_RetrievalDataset, shuffle=False)

#4.2

##4.2.1

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [5]:
# [C4, C5]

class ResBlock(nn.Module):
 
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        if in_channels != out_channels:
            self.downsample = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                                            nn.BatchNorm2d(out_channels))
        elif in_channels == out_channels:
            self.identity3 = nn.Identity()

        self.in_channels = in_channels
        self.out_channels = out_channels
 
    def forward(self, x):
        f = self.conv1(x)
        f = self.bn1(f)
        f = self.relu(f)

        f = self.conv2(f)
        f = self.bn2(f)
        
        if self.in_channels != self.out_channels:
            g = self.downsample(x)
        else:
            g = self.identity3(x)
        
        out = self.relu(f+g)
 
        return out

class ResNet18(nn.Module):
 
    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = nn.Sequential(ResBlock(64, 64), ResBlock(64, 64))
        self.layer2 = nn.Sequential(ResBlock(64, 128, 2), ResBlock(128, 128))
        self.layer3 = nn.Sequential(ResBlock(128, 256, 2), ResBlock(256, 256))
        self.layer4 = nn.Sequential(ResBlock(256, 512), ResBlock(512, 512))

        self.avgpool = nn.AvgPool2d(8, stride=1, padding=0)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
 
        return x

from torchsummary import summary

model = ResNet18().to(device)
summary(model, input_size=(3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
         Identity-10           [-1, 64, 32, 32]               0
             ReLU-11           [-1, 64, 32, 32]               0
         ResBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


##4.2.2

In [6]:
# [C6]

import torch.utils.model_zoo as model_zoo
url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
pretrain_dict = model_zoo.load_url(url)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [7]:
model.load_state_dict(pretrain_dict)

RuntimeError: ignored

In [None]:
import re

model_state_dict = model.state_dict()
state_dict = {}

for k, v in pretrain_dict.items():
    if k in model_state_dict:
        if pretrain_dict[k].shape == model_state_dict[k].shape:
            state_dict[k] = v
        else:
            print("{} param shape is not matched with {} in our model".format(k, k))
    else:
        print("{} param cannot be found in our model".format(k))

model_state_dict.update(state_dict)
model.load_state_dict(model_state_dict) 

#4.3

In [8]:
# [C7]

class ResNet18(nn.Module):
 
    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = nn.Sequential(ResBlock(64, 64), 
                                    ResBlock(64, 64))
        self.layer2 = nn.Sequential(ResBlock(64, 128, 2), 
                                    ResBlock(128, 128))
        self.layer3 = nn.Sequential(ResBlock(128, 256, 2), 
                                    ResBlock(256, 256))
        self.layer4 = nn.Sequential(ResBlock(256, 512), 
                                    ResBlock(512, 512))

        self.avgpool = nn.AvgPool2d(8, stride=1, padding=0)

        self.fc_layer1 = nn.Sequential(nn.Linear(512, 512), 
                                       nn.BatchNorm1d(512),
                                       nn.ReLU(inplace=True),
                                       nn.Dropout(p=0.2, inplace=True))
        self.fc_layer2 = nn.Sequential(nn.Linear(512, 256), 
                                       nn.BatchNorm1d(256),
                                       nn.ReLU(inplace=True),
                                       nn.Dropout(p=0.2, inplace=True))
        
        self.fc_layer3_train = nn.Sequential(nn.Linear(256, 230))
        self.fc_layer3_val = nn.Sequential(nn.Linear(256, 49))
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)

        x = x.view(x.size(0), -1)
        x = self.fc_layer1(x)
        x = self.fc_layer2(x)
        x = self.fc_layer3_train(x)
 
        return x

from torchsummary import summary

model = ResNet18().to(device)
summary(model, input_size=(3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
         Identity-10           [-1, 64, 32, 32]               0
             ReLU-11           [-1, 64, 32, 32]               0
         ResBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

In [17]:
# [C8]

class IDLossDataset(Dataset):
    def __init__(self, csv_dir, image_dir='pa2_data', transform=None):
        info_df = pd.read_csv(csv_dir)
        self.filepath = {}
        self.id = {}
        for index, rows in info_df.iterrows():
            self.filepath[index] = rows['filepath']
            self.id[index] = rows['id']
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, idx):
        dir = self.filepath[idx]
        img_name = os.path.join(self.image_dir, dir)
        image = Image.open(img_name)
        label = self.id[idx]
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

    def __len__(self):
        return len(self.filepath)

# [C9]

transform_train = T.Compose([T.ToTensor(),
                             T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                             CustomResize(128),
                             T.RandomHorizontalFlip(p=0.5),
                             T.RandomErasing(p=0.5, scale=(0.05, 0.2), inplace=True),
                             CustomPad(max_x=128, max_y=128)])

train_IDLossDataset = IDLossDataset('pa2_data/train_idloss.csv', transform=transform_train)
val_IDLossDataset = IDLossDataset('pa2_data/val_idloss.csv', transform=transform)

train_IDLossDataset_loader = DataLoader(train_IDLossDataset, batch_size=128, shuffle=True)
val_IDLossDataset_loader = DataLoader(val_IDLossDataset, batch_size=1, shuffle=False)

In [None]:
# [C10]

from tqdm.notebook import tqdm

def save_checkpoint(save_path, model, optimizer, val_loss):
    if save_path==None:
        return
    save_path = save_path 
    state_dict = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'val_loss': val_loss}

    torch.save(state_dict, save_path)
    print('Model saved to {}'.format(save_path))


def load_checkpoint(save_path, model, optimizer):
    save_path = save_path #f'cifar_net.pt'
    state_dict = torch.load(save_path)
    model.load_state_dict(state_dict['model_state_dict'])
    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
    val_loss = state_dict['val_loss']
    print(f'Model loaded from {save_path}, with val loss: {val_loss}')
    return val_loss


def train_model(net, train_loader, valid_loader,  num_epochs, criterion, optimizer, val_loss, device, save_name):
    
    if val_loss==None:
        best_val_loss = float("Inf")  
    else: 
        best_val_loss=val_loss
        print('Resume training')

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        ## Train phase
        net.train()
        running_loss = 0.0
        running_corrects = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs) ## step 1.
            loss = criterion(outputs, labels) ## step 2.

            optimizer.zero_grad() ## clear the previous gradients 
            loss.backward() ## step 3. backpropagation - compute gradient
            optimizer.step() ## step 4. w = w - eta*w.grad
            running_loss += loss.item()

            _, preds = torch.max(outputs.data, 1) 
            running_corrects += torch.sum(preds == labels.data)  ## step 2 - measure accuracy
        ## train epoch loss and accuracy
        train_loss = running_loss / len(train_loader)
        train_acc = running_corrects / float(len(train_loader.dataset))
        
        ## Validation phase
        with torch.no_grad():
            net.eval()
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in tqdm(valid_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = net(inputs) ## step 1
                loss = criterion(outputs, labels) ## step 2 - loss
                running_loss += loss.item()

                _, preds = torch.max(outputs.data, 1)
                running_corrects += torch.sum(preds == labels.data) ## step 2 - measure accuracy
            ## validation epoch loss and accuracy
            valid_loss = running_loss / len(valid_loader)
            valid_acc = running_corrects / float(len(valid_loader.dataset))

        print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Valid Loss: {:.4f},  Valid Acc: {:.4f}'
              .format(epoch+1, num_epochs, train_loss, train_acc, valid_loss, valid_acc))

        if valid_loss < best_val_loss: ## save checkpoint when the validation loss is reduced
            best_val_loss = valid_loss
            save_checkpoint(save_name, net, optimizer, best_val_loss)

    print('Finished Training')
    
num_epochs = 50
best_val_loss = None
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
save_path = f'cifar_net.pt'
model = model.to(device)

train_model(model, train_loader, test_loader, num_epochs, criterion, optimizer, best_val_loss, device, save_path)

#4.4