In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

FOLDERNAME = 'COMP4211/pa2/'

import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/COMP4211/pa2


# 4.1

In [2]:
import torch
import numpy as np
import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset
import os
import os.path as osp
import matplotlib.pyplot as plt
import torchvision.transforms as T

import torchvision.transforms.functional as F
from torchvision.transforms.functional import InterpolationMode, _interpolation_modes_from_int

from torch.utils.data import DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class RetrievalDataset(Dataset):
    def __init__(self, csv_dir, image_dir='pa2_data', transform=None):
        info_df = pd.read_csv(csv_dir)
        self.gallery_dir = {rows['query'] : rows['gallery'] for index, rows in info_df.iterrows()}
        self.image_dir = image_dir
        self.transform = transform

    def __getitem__(self, query):
        img_name = os.path.join(self.image_dir, query)
        image = Image.open(img_name)
        label = self.gallery_dir[query]
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

    def __len__(self):
        return len(self.gallery_dir)
    
    def get_gallery_imgs(self):
        gallery_imgs = []

        for gallery in self.gallery_dir.values():
            img_name = os.path.join(self.image_dir, gallery)
            image = Image.open(img_name)
            
            if self.transform is not None:
                image = self.transform(image)

            gallery_imgs.append(image)

        return gallery_imgs

class CustomPad(torch.nn.Module):
    
    def __init__(self, max_x, max_y, fill_v=0):
        super(CustomPad, self).__init__()
        self.max_x = max_x
        self.max_y = max_y
        self.fill_v = fill_v

    def forward(self, img):
        y = img.shape[1]
        x = img.shape[2]
        new_img = torch.ones((img.shape[0], self.max_y, self.max_x)) * self.fill_v

        if y != self.max_y:
            dy = (self.max_y - y) // 2
            new_img[:, dy:dy+y, :] = img[:, :, :]
        elif x != self.max_x:
            dx = (self.max_x - x) // 2
            new_img[:, :, dx:dx+x] = img[:, :, :]

        return new_img

class CustomResize(torch.nn.Module):

    def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
        super().__init__()
        # if not isinstance(size, (int, Sequence)):
        #     raise TypeError("Size should be int or sequence. Got {}".format(type(size)))
        # if isinstance(size, Sequence) and len(size) not in (1, 2):
        #     raise ValueError("If size is a sequence, it should have 1 or 2 values")
        
        self.size = size
        self.max_size = max_size

        # Backward compatibility with integer value
        if isinstance(interpolation, int):
            warnings.warn(
                "Argument interpolation should be of type InterpolationMode instead of int. "
                "Please, use InterpolationMode enum."
            )
            interpolation = _interpolation_modes_from_int(interpolation)

        self.interpolation = interpolation
        self.antialias = antialias

    def forward(self, img):
        y = img.shape[1]
        x = img.shape[2]
        if x <= y:
            x_new = round(self.size * x / y)
            return F.resize(img, (self.size, x_new), self.interpolation, self.max_size, self.antialias)
        else:
            y_new = round(self.size * y / x)
            return F.resize(img, (y_new, self.size), self.interpolation, self.max_size, self.antialias)

    def __repr__(self):
        interpolate_str = self.interpolation.value
        return self.__class__.__name__ + '(size={0}, interpolation={1}, max_size={2}, antialias={3})'.format(
            self.size, interpolate_str, self.max_size, self.antialias)

transform = T.Compose([T.ToTensor(),
                       T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                       CustomResize(128),
                       CustomPad(max_x=128, max_y=128)])

val_set = RetrievalDataset('pa2_data/val/gt.csv', transform=transform)
test_set = RetrievalDataset('pa2_data/test/pred.csv', transform=transform)

val_loader = DataLoader(val_set, shuffle=True)
test_loader = DataLoader(test_set, shuffle=False)


# fig = plt.figure()

# image, label = dataset['val/query/10.jpg']
# images = [image]
# images.append(T.ToTensor()(images[0]))
# images.append(T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(images[1]))
# images.append(CustomResize(128)(images[2]))
# images.append(CustomPad(max_x=128, max_y=128)(images[3]))

# for i in range(len(images)):
#     ax = plt.subplot(1, len(images), i+1)
#     plt.tight_layout()
#     ax.axis('off')
#     if i > 0:
#         npimg = images[i].numpy()
#         print(npimg.shape)
#         plt.imshow(np.transpose(npimg, (1, 2, 0)))
#         plt.show()
#     else:
#         plt.imshow(images[i])

#4.2

##4.2.1

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [28]:
class ResBlock(nn.Module):
 
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()

        self.conv_1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn_1 = nn.BatchNorm2d(out_channels)
        self.relu_1 = nn.ReLU(inplace=True)

        self.conv_2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn_2 = nn.BatchNorm2d(out_channels)
        
        if in_channels != out_channels:
            self.residual_conv_3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False)
            self.residual_bn_3 = nn.BatchNorm2d(out_channels)
        elif in_channels == out_channels:
            self.identity_3 = nn.Identity()
        
        self.relu_5 = nn.ReLU(inplace=True)

        self.in_channels = in_channels
        self.out_channels = out_channels
 
    def forward(self, x):
        f = self.conv_1(x)
        f = self.bn_1(f)
        f = self.relu_1(f)

        f = self.conv_2(f)
        f = self.bn_2(f)
        
        if self.in_channels != self.out_channels:
            g = self.residual_conv_3(x)
            g = self.residual_bn_3(g)
        else:
            g = self.identity_3(x)
        
        out = self.relu_5(f+g)
 
        return out

class ResNet18(nn.Module):
 
    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv_1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn_1 = nn.BatchNorm2d(64)
        self.relu_1 = nn.ReLU(inplace=True)

        self.max_pooling_2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.reblock_3 = ResBlock(64, 64)
        self.reblock_4 = ResBlock(64, 64)
        self.reblock_d_5 = ResBlock(64, 128, 2)
        self.reblock_6 = ResBlock(128, 128)
        self.reblock_d_7 = ResBlock(128, 256, 2)
        self.reblock_8 = ResBlock(256, 256)
        self.reblock_9 = ResBlock(256, 512)
        self.reblock_10 = ResBlock(512, 512)

        self.avgpool_11 = nn.AvgPool2d(8, stride=1, padding=0)
 
    def forward(self, x):
        x = self.conv_1(x)
        x = self.bn_1(x)
        x = self.relu_1(x)

        x = self.max_pooling_2(x)
        
        x = self.reblock_3(x)
        x = self.reblock_4(x)
        x = self.reblock_d_5(x)
        x = self.reblock_6(x)
        x = self.reblock_d_7(x)
        x = self.reblock_8(x)
        x = self.reblock_9(x)
        x = self.reblock_10(x)

        x = self.avgpool_11(x)
 
        return x

from torchsummary import summary

model = ResNet18().to(device)
summary(model, input_size=(3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
         Identity-10           [-1, 64, 32, 32]               0
             ReLU-11           [-1, 64, 32, 32]               0
         ResBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

In [41]:
class ResBlock(nn.Module):
 
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        if in_channels != out_channels:
            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False)
            self.bn3 = nn.BatchNorm2d(out_channels)
        elif in_channels == out_channels:
            self.identity3 = nn.Identity()

        self.in_channels = in_channels
        self.out_channels = out_channels
 
    def forward(self, x):
        f = self.conv1(x)
        f = self.bn1(f)
        f = self.relu(f)

        f = self.conv2(f)
        f = self.bn2(f)
        
        if self.in_channels != self.out_channels:
            g = self.conv3(x)
            g = self.bn3(g)
        else:
            g = self.identity3(x)
        
        out = self.relu(f+g)
 
        return out

class ResNet18(nn.Module):
 
    def __init__(self):
        super(ResNet18, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = nn.Sequential(ResBlock(64, 64), ResBlock(64, 64))
        self.layer2 = nn.Sequential(ResBlock(64, 128, 2), ResBlock(128, 128))
        self.layer3 = nn.Sequential(ResBlock(128, 256, 2), ResBlock(256, 256))
        self.layer4 = nn.Sequential(ResBlock(256, 512), ResBlock(512, 512))

        self.avgpool = nn.AvgPool2d(8, stride=1, padding=0)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
 
        return x

from torchsummary import summary

model = ResNet18().to(device)
summary(model, input_size=(3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
         Identity-10           [-1, 64, 32, 32]               0
             ReLU-11           [-1, 64, 32, 32]               0
         ResBlock-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

##4.2.2

In [6]:
import torch.utils.model_zoo as model_zoo
url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
pretrain_dict = model_zoo.load_url(url)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [47]:
model.load_state_dict(pretrain_dict)

RuntimeError: ignored

In [48]:
import re

model_state_dict = model.state_dict()
state_dict = {}

for k, v in pretrain_dict.items():
    if k in model_state_dict:
        if pretrain_dict[k].shape == model_state_dict[k].shape:
            state_dict[k] = v
        else:
            print("{} param shape is not matched with {} in our model".format(k, k))
    else:
        print("{} param cannot be found in our model".format(k))

model_state_dict.update(state_dict)
model.load_state_dict(model_state_dict) 

layer2.0.downsample.0.weight param cannot be found in our model
layer2.0.downsample.1.running_mean param cannot be found in our model
layer2.0.downsample.1.running_var param cannot be found in our model
layer2.0.downsample.1.weight param cannot be found in our model
layer2.0.downsample.1.bias param cannot be found in our model
layer3.0.downsample.0.weight param cannot be found in our model
layer3.0.downsample.1.running_mean param cannot be found in our model
layer3.0.downsample.1.running_var param cannot be found in our model
layer3.0.downsample.1.weight param cannot be found in our model
layer3.0.downsample.1.bias param cannot be found in our model
layer4.0.downsample.0.weight param cannot be found in our model
layer4.0.downsample.1.running_mean param cannot be found in our model
layer4.0.downsample.1.running_var param cannot be found in our model
layer4.0.downsample.1.weight param cannot be found in our model
layer4.0.downsample.1.bias param cannot be found in our model
fc.weight par

<All keys matched successfully>

#4.3

#4.4