In [1]:
import pandas as pd
import numpy as np
import torch, torchvision
import cv2, PIL
from torch import nn
import matplotlib.pyplot as plt
import albumentations
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim
from tqdm import tqdm

In [2]:
multiple_gpus = False
if torch.cuda.is_available():
    if torch.cuda.device_count() > 1:
        multiple_gpus = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
csv_file = pd.read_csv('./wiki.csv')
csv_file = csv_file[csv_file['valid_images'] != 0.0]
csv_file = csv_file[csv_file['face_score'] != float('-inf')]

In [4]:
csv_file.head()

Unnamed: 0.1,Unnamed: 0,img_path,dob,photo_taken_year,gender,name,face_location,face_score,updated_paths,valid_images
0,0,17/10000217_1981-05-05_2009.jpg,1981-05-05,2009,M,Sami Jauhojärvi,"111.29109473290997, 111.29109473290997, 252.66...",4.300962,../../IMDB_WIKI/wiki/17/10000217_1981-05-05_20...,1.0
1,1,48/10000548_1925-04-04_1964.jpg,1925-04-04,1964,M,Dettmar Cramer,"252.48330229530742, 126.68165114765371, 354.53...",2.645639,../../IMDB_WIKI/wiki/48/10000548_1925-04-04_19...,1.0
2,2,12/100012_1948-07-03_2008.jpg,1948-07-03,2008,M,Marc Okrand,"113.52, 169.83999999999997, 366.08, 422.4",4.329329,../../IMDB_WIKI/wiki/12/100012_1948-07-03_2008...,1.0
4,4,16/10002116_1971-05-31_2012.jpg,1971-05-31,2012,F,Diana Damrau,"171.61031405173117, 75.57451239763239, 266.766...",3.408442,../../IMDB_WIKI/wiki/16/10002116_1971-05-31_20...,1.0
5,5,02/10002702_1960-11-09_2012.jpg,1960-11-09,2012,F,Krista Tippett,"274.76563240288175, 57.7700900839337, 376.8869...",4.748056,../../IMDB_WIKI/wiki/02/10002702_1960-11-09_20...,1.0


In [5]:
img_paths = csv_file['updated_paths'].values.tolist()
face_score = csv_file['face_location'].values.tolist()

In [6]:
def get_model():
    res50 = torchvision.models.resnet50()
    res50.fc = nn.Linear(2048, 4)
    return res50

In [7]:
model = get_model()

In [8]:
def split_datasets(csv_file, test_size):
    train, test = train_test_split(csv_file, test_size = test_size)
    train, val = train_test_split(train, test_size = test_size)
    return train, val, test

In [9]:
train, val, test = split_datasets(csv_file, 0.01)

In [10]:
print(f'Train dataset size: {len(train)}')
print(f'Val dataset size: {len(val)}')
print(f'Test dataset size: {len(test)}')

Train dataset size: 43429
Val dataset size: 439
Test dataset size: 444


In [11]:
train.head()

Unnamed: 0.1,Unnamed: 0,img_path,dob,photo_taken_year,gender,name,face_location,face_score,updated_paths,valid_images
34151,34151,21/14575821_1949-08-04_2014.jpg,1949-08-04,2014,M,Rich Stubler,"231.2, 64.8, 345.6, 179.2",1.387475,../../IMDB_WIKI/wiki/21/14575821_1949-08-04_20...,1.0
6657,6657,88/40102988_1988-09-25_2014.jpg,1988-09-25,2014,F,Ekaterina Koneva,"206.244, 59.364000000000004, 293.76, 146.88",1.237428,../../IMDB_WIKI/wiki/88/40102988_1988-09-25_20...,1.0
46549,46549,20/537020_1958-11-19_2008.jpg,1958-11-19,2008,M,Charlie Kaufman,"174.73856254440588, 163.87740238538052, 271.52...",4.012372,../../IMDB_WIKI/wiki/20/537020_1958-11-19_2008...,1.0
41166,41166,29/35613229_1965-11-01_2012.jpg,1965-11-01,2012,M,Marko Kraljević,"165.58440384557463, 66.80976153822985, 312.786...",3.622035,../../IMDB_WIKI/wiki/29/35613229_1965-11-01_20...,1.0
35550,35550,75/26576875_1945-08-12_2009.jpg,1945-08-12,2009,M,Jean Nouvel,"158.09489842324112, 79.38044921162056, 275.500...",2.383003,../../IMDB_WIKI/wiki/75/26576875_1945-08-12_20...,1.0


In [12]:
class GetDataset(Dataset):
    def __init__(self, csv_file, target_image_size):
        self.csv_file = csv_file
        self.target_image_size = target_image_size
        self.img_paths = self.csv_file['updated_paths'].values.tolist()
        self.boxes = self.csv_file['face_location'].values.tolist()

    def resize_box(self, old_image_size, new_image_size, old_box_values):
        if len(old_image_size) == 3:
            width, height = old_image_size[2], old_image_size[1]
        else:
            width, height = old_image_size[1], old_image_size[0]
        x1, x2, y1, y2 = old_box_values[0], old_box_values[2], old_box_values[1], old_box_values[3]
        x1_o = x1 / width
        x2_o = x2 / width
        y1_o = y1 / height
        y2_o = y2 / height

        x1_n = new_image_size[0] * x1_o
        x2_n = new_image_size[0] * x2_o
        y1_n = new_image_size[1] * y1_o
        y2_n = new_image_size[1] * y2_o
        return [x1_n, y1_n, x2_n, y2_n]

    def __getitem__(self, index):
        img = torchvision.io.read_image(self.img_paths[index], mode=torchvision.io.ImageReadMode.RGB)
        box = [int(float(x)) for x in self.boxes[index].split(',')]
        updated_box = self.resize_box(img.shape, self.target_image_size, box)
        img = torchvision.transforms.functional.resize(img, (self.target_image_size[0], self.target_image_size[1]))
        updated_box = torch.Tensor(updated_box)
        return img.float(), updated_box

    def __len__(self):
        return len(self.csv_file)

In [13]:
def box_loss(inputs, targets):
    abs_diffs = torch.abs(inputs - targets)
    x1 = torch.sum(abs_diffs[:, 0]) / inputs.shape[0]
    y1 = torch.sum(abs_diffs[:, 1]) / inputs.shape[0]
    x2 = torch.sum(abs_diffs[:, 2]) / inputs.shape[0]
    y2 = torch.sum(abs_diffs[:, 3]) / inputs.shape[0]
    temp = x1 + x2 + y1 + y2
    return temp
    # print(temp)
    # return x1 + y1 + x2 + y2
    # return 0.0

In [14]:
train_dataset = GetDataset(train, (512, 512))
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=1)
val_dataset = GetDataset(val, (512, 512))
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=True, num_workers=1)

In [17]:
def train_model(model, train_set, val_set, load_weights, epochs):
    data_pointers = {
        'train' : train_set,
        'val' : val_set
    }
    if load_weights != None:
        model.load_state_dict(torch.load(load_weights)['model_state_dict'])
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    if next(model.parameters()).is_cuda == False:
        if multiple_gpus == True:
            model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        train_loss, val_loss = 0.0, 0.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            with tqdm(data_pointers[phase], unit='batch') as tepoch:
                for imgs, labels in tepoch:
                    tepoch.set_description(f'Epoch: {epoch}')
                    imgs = imgs.to(device)
                    labels = labels.to(device)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(imgs)
                        loss = box_loss(outputs, labels)
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                    running_loss += loss.item()
                    tepoch.set_postfix(loss = loss.item())
            if phase == 'train':
                train_loss = running_loss / len(train_set)
                print(f'{phase} Loss: {float(train_loss)}')
            else:
                train_loss = running_loss / len(val_set)
                print(f'{phase} Loss: {float(val_loss)}')

In [18]:
train_model(get_model(), train_loader, val_loader, None, 2)

Epoch: 0:   2%|▏         | 427/21715 [00:52<43:43,  8.12batch/s, loss=262] 


KeyboardInterrupt: 