# Read me
1. To successfully run this code, one has to put all the folders downloaded from kaggle under the folder where this code is being run.
2. Ceate a folder called **path** where all the pth files will be stored.
3. Run cells one after another to train and test the model

## Model Description
* This is a resnet50 model with normalized data. The architecture of the model is the same as [torchvision.models.resnet50](https://pytorch.org/vision/stable/models.html#torchvision.models.resnet50). All the hyperparameters are tuned based on this website [ResNet 50 v1.5](https://ngc.nvidia.com/catalog/resources/nvidia:resnet_50_v1_5_for_pytorch) and the posts on piazza.

* Details about the model structure is written in a text cell above the corresponding code cell.

In [None]:
import numpy as np
import torch
from torch import nn, optim, unsqueeze`a
from tqdm import tqdm
import torch.nn.functional as F
from torchvision import transforms, datasets
import torchvision
from PIL import Image
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
batchsize = 80

Using cuda device


Normalization idea came from this post[@1097](https://piazza.com/class/knsmz2b3z131mn?cid=1097)
and other transformation ideas came from the observation of the trainning data

In [None]:
#%%transform
transform_train = transforms.Compose([
    transforms.ColorJitter(brightness=.5, hue=.3),
    transforms.RandomRotation(degrees=45),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [None]:
#%%Loading data
train_dataset = ImageFolder(root = 'train_data/', transform= transform_train)
val_dataset = ImageFolder(root = 'val_data/', transform= transform)
train_dataloader = DataLoader(train_dataset,
                              batch_size = batchsize,
                              num_workers=4,
                              shuffle=True,
                              pin_memory = True)
val_dataloader = DataLoader(val_dataset,
                            batch_size = batchsize,
                            num_workers=4,
                            shuffle=False,
                            pin_memory = True)

* I built **my_resnet50** by printing out "torchvision.models.resnet50" to see 
how the different layers are constructed and their specifications.


* However, given that the size of the input image is only 64x64, I modified the kernel size and the stride in the first convolution to be 1 such that no downsampling happens here; furthermore I also got rid of the first maxpooling layer to prevent downsampling before entering the residual blocks.


* The embedding size is according to one of the comments posted under this [post](https://piazza.com/class/knsmz2b3z131mn?cid=831) @831_f36 : "In general, small embedding sizes (512 - 1024) are enough to get an A cutoff."

* Droupout is added because I faced some severe early trainning overfitting

In [None]:
#%%building resnet50 from scratch
class Bottleneck(nn.Module):
    def __init__(self, in_channel, exp, first_block=False, stride = 1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=64*(2**exp), kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(num_features=64*(2**exp))
        self.conv2 = nn.Conv2d(in_channels=64*(2**exp), out_channels=64*(2**exp), kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(num_features=64*(2**exp))
        self.conv3 = nn.Conv2d(in_channels=64*(2**exp), out_channels=256*(2**exp), kernel_size=1, stride=1,bias=False)                
        self.bn3 = nn.BatchNorm2d(num_features=256*(2**exp))
        self.relu = nn.ReLU(inplace=True)
        if first_block:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels=in_channel, out_channels=256*(2**exp), kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(num_features=256*(2**exp))
                )
        else:
            self.downsample = nn.Identity()
    def forward(self, X):
        out = self.conv1(X)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        out = self.relu(out + self.downsample(X))
        return out

In [None]:
#%%
class my_resnet50(nn.Module):
    def __init__(self, num_classes = 4000):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)      
        self.layer1 = nn.Sequential(
            Bottleneck(in_channel =  64, exp = 0, first_block=True),
            Bottleneck(in_channel = 256, exp = 0, first_block=False),
            Bottleneck(in_channel = 256, exp = 0, first_block=False)
            )
        self.layer2 = nn.Sequential(
            Bottleneck(in_channel = 256, exp = 1, first_block=True, stride=2),
            Bottleneck(in_channel = 512, exp = 1, first_block=False),
            Bottleneck(in_channel = 512, exp = 1, first_block=False),
            Bottleneck(in_channel = 512, exp = 1, first_block=False),
            )
        self.layer3 = nn.Sequential(
            Bottleneck(in_channel = 512, exp = 2, first_block=True, stride=2),
            Bottleneck(in_channel = 1024, exp = 2, first_block=False),
            Bottleneck(in_channel = 1024, exp = 2, first_block=False),
            Bottleneck(in_channel = 1024, exp = 2, first_block=False),
            Bottleneck(in_channel = 1024, exp = 2, first_block=False),
            Bottleneck(in_channel = 1024, exp = 2, first_block=False),
            )
        self.layer4 = nn.Sequential(
            Bottleneck(in_channel =  1024, exp = 3, first_block=True, stride=2),
            Bottleneck(in_channel = 2048, exp = 3, first_block=False),
            Bottleneck(in_channel = 2048, exp = 3, first_block=False)
            )
        self.avgpool= nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(in_features=2048, out_features=4000, bias=True)
        self.embedding = nn.Linear(in_features=2048, out_features=512, bias=True)#for valification
        self.dropout = nn.Dropout()
    def forward(self, X, return_embedding=False):
        out = self.conv1(X)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = self.flatten(out)
        out = self.dropout(out)
        if return_embedding:
            embedding_out = self.relu(self.embedding(out))
            output = self.fc(out)
            return embedding_out,output
        else:
            output = self.fc(out)
            return output

In [None]:
config={
      "epoch": 50,
      "lr": 0.128,
      "momentum": 0.875, 
      "weight_decay": 5e-5
}

In [None]:
#%% Set model parameters
#https://ngc.nvidia.com/catalog/resources/nvidia:resnet_50_v1_5_for_pytorch
model = my_resnet50()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=config['momentum'], weight_decay=config['weight_decay'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)


In [None]:
#%%train
def training(dataloader, model, criterion, optimizer):
    sum_loss, accuracy = 0.0, 0.0
    total_correct = 0
    total = 0
    n_batches = len(dataloader) #number of batches
    #train mode
    model.train()
    for (X, y) in dataloader:
        #sending data to device
        X, y = X.float().to(device), y.to(device)
        #Forward
        optimizer.zero_grad()
        prediction = model(X)
        loss = criterion(prediction, y)

        # Backpropagation        
        loss.backward()
        optimizer.step()
        sum_loss += loss 
        y_hat = prediction.argmax(1)
        total_correct += torch.sum(y_hat == y)
        total += y.size(0)
    mean_loss = sum_loss.item() / n_batches
    accuracy = total_correct.item()/ total
    return mean_loss, accuracy

In [None]:
#%%validating
def testing(dataloader, model):
    accuracy = 0.0
    total_correct = 0
    total = 0
    n_batches = len(dataloader) #number of batches
    model.eval() 
    with torch.no_grad():
        for (X, y) in dataloader:
            #sending data to device
            X, y = X.float().to(device), y.to(device)
            #Forward
            prediction = model(X)
            #calculating loss
            y_hat = prediction.argmax(1)            
            total_correct += torch.sum(y_hat == y)
            total += y.size(0)
    accuracy = total_correct.item()/ total
    return accuracy

In [None]:
#%%running model
print('Running model')
for e in tqdm(range(epoch)):
    #trainning
    train_loss, train_accuracy = training(train_dataloader, model, criterion, optimizer)
    #Saving & Loading a General Checkpoint for Inference and/or Resuming Training
    val_accuracy = testing(val_dataloader, model)
    # Note that step should be called after validate()
    if e > 9:
        scheduler.step(val_accuracy) 
        torch.save({
        'epoch': e,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_accuracy': val_accuracy}, 'path/'+str(e)+'.pth')
    print({"Epoch": str(e), 
          "Train Loss": train_loss,
          "Train Accuracy": train_accuracy,
          "Validation Accuracy": val_accuracy}) 
print('Done.')


Load the model with highest validation accuracy. In my case, it is 43 with validation accuracy  =  0.8504

In [None]:
config2={
      "epoch": 50,
      "lr": 0.001,
      "momentum": 0.875, 
      "weight_decay": 5e-5
}

In [None]:
#%% Set model parameters
#https://ngc.nvidia.com/catalog/resources/nvidia:resnet_50_v1_5_for_pytorch
# T_max value should be num_epochs
checkpoint = torch.load('path/43.pth')
model = my_resnet50()
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=config['momentum'], weight_decay=config['weight_decay'])
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=7)

In [None]:
#%%running model
print('Running model')
epochs = checkpoint['epoch']
for e in tqdm(range(epochs+1,epochs+epoch)):
    #trainning
    train_loss, train_accuracy = training(train_dataloader, model, criterion, optimizer)
    #Saving & Loading a General Checkpoint for Inference and/or Resuming Training
    val_accuracy = testing(val_dataloader, model)
    # Note that step should be called after validate()
    if e > epochs+5:
        #scheduler.step(val_accuracy) 
        scheduler.step() 
        torch.save({
        'epoch': e,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_accuracy': val_accuracy}, 'path/'+str(e)+'.pth')
    print({"Epoch": str(e), 
          "Train Loss": train_loss,
          "Train Accuracy": train_accuracy,
          "Validation Accuracy": val_accuracy}) 
print('Done.')

The final model is the one with the highest validation accuracy (at least 0.87 )

# Prediction
for prediction, please manually load the model with the highest validation accuracy.

In [None]:
checkpoint = torch.load('path/93.pth') #load the model with the highest validation accuracy.
model = my_resnet50()
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)

In [None]:
import glob
file_list = list(glob.glob("test_data/*"))

In [None]:
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, file_list):
        self.file_list = file_list

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        img = transform(img)
        return img

In [None]:
test_dataset = ImageDataset(file_list)
test_dataloader = DataLoader(test_dataset,
                            batch_size = 1,
                            num_workers=4,
                            shuffle=False,
                            pin_memory = True)

In [None]:
class_to_idx = train_dataset.class_to_idx

In [None]:
#%%testing
prediction = []
model.eval()
with torch.no_grad():
    for X in tqdm(test_dataloader):
        X = X.float().to(device)
        #Predicting
        prediction += [model(X).argmax(1).item()]          

100%|██████████████████████████████████████| 8000/8000 [00:51<00:00, 155.16it/s]


In [None]:
def get_label(p):
    for i, (key, value) in enumerate(class_to_idx.items()):  # for name, age in dictionary.iteritems():  (for Python 2.x)
        if value == p:
            return key
    print(i,"has no matched key")

In [None]:
test_prediction = list(map(get_label, prediction))

In [None]:
#%%save prediction to csv
import pandas as pd
submit = pd.read_csv("classification_sample_submission.csv")

In [None]:
submit["label"] = test_prediction

In [None]:
submit.to_csv('Hw2classification.csv', index= False)

# Face Verification
* For verification task, please reuse the model that is used for prediciton
* please put the file downloaded from kaggle under the same folder where the code is run

Run the following cell for face verification task

In [None]:
val_pair = []
with open('verification_pairs_test.txt') as f:
    lines = f.readlines()
for line in lines:
    val_pair += [line.split()]

In [None]:
model.eval()
similarity = []
labels = []
compute_sim = nn.CosineSimilarity(dim=0)
with torch.no_grad():
    for i in tqdm(val_pair):
        im1 = Image.open('verification_data/'+i[0])
        im1 = transforms.ToTensor()(im1)
        im1 = im1.unsqueeze(0)
        im1 = im1.float().to(device)
        im2 = Image.open('verification_data/'+i[1])
        im2 = transform(im2)
        im2 = im2.unsqueeze(0)
        im2 = im2.float().to(device)
        eb1 = model(im1).squeeze(0)
        eb2 = model(im2).squeeze(0)
        similarity.append(compute_sim(eb1, eb2).item())

In [None]:
#%%save prediction to csv
import pandas as pd
submit_verification = pd.read_csv("verification_solution_sample.csv")

In [None]:
submit_verification['Category'] = similarity

In [None]:
submit_verification.to_csv('Hw2classification.csv', index= False)