In [1]:
import pandas as pd
import numpy as np
import torch, torchvision
import os
import cv2, PIL
import sys
sys.path.append('../scripts/')
from resize_boxes import resize_values
from tqdm import tqdm
import pickle
from torch.utils.data import Dataset, DataLoader
import multiprocessing as mp
from sklearn.model_selection import train_test_split
from torch import nn
from torch import optim

In [2]:
multiple_gpus = False
if torch.cuda.is_available():
    if torch.cuda.device_count() > 1:
        multiple_gpus = True
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
wiki_csv = pd.read_csv('./wiki.csv')

In [4]:
wiki_csv.head()

Unnamed: 0.1,Unnamed: 0,img_path,dob,photo_taken_year,gender,name,face_location,face_score,updated_paths,valid_images
0,0,17/10000217_1981-05-05_2009.jpg,1981-05-05,2009,M,Sami Jauhojärvi,"111.29109473290997, 111.29109473290997, 252.66...",4.300962,../../IMDB_WIKI/wiki/17/10000217_1981-05-05_20...,1.0
1,1,48/10000548_1925-04-04_1964.jpg,1925-04-04,1964,M,Dettmar Cramer,"252.48330229530742, 126.68165114765371, 354.53...",2.645639,../../IMDB_WIKI/wiki/48/10000548_1925-04-04_19...,1.0
2,2,12/100012_1948-07-03_2008.jpg,1948-07-03,2008,M,Marc Okrand,"113.52, 169.83999999999997, 366.08, 422.4",4.329329,../../IMDB_WIKI/wiki/12/100012_1948-07-03_2008...,1.0
3,3,65/10001965_1930-05-23_1961.jpg,1930-05-23,1961,M,Aleksandar Matanović,"1, 1, 634, 440",-inf,../../IMDB_WIKI/wiki/65/10001965_1930-05-23_19...,1.0
4,4,16/10002116_1971-05-31_2012.jpg,1971-05-31,2012,F,Diana Damrau,"171.61031405173117, 75.57451239763239, 266.766...",3.408442,../../IMDB_WIKI/wiki/16/10002116_1971-05-31_20...,1.0


In [5]:
# Please load issue image paths and remove them for getting used.
with open('./issue_paths.pkl', 'rb') as handle:
    issues = pickle.load(handle)

In [6]:
wiki_csv = wiki_csv[~wiki_csv['updated_paths'].isin(issues)]

In [7]:
def detect_single_channel(img_path):
    img = torchvision.io.read_file(img_path)
    img = torchvision.io.decode_jpeg(img)
    return (img_path, img.shape)
def drop_single_channel_imgs(csv_file):
    paths = csv_file['updated_paths'].values.tolist()
    issues = []
    with mp.Pool(6) as p:
        detected = list(p.map(detect_single_channel, paths))
    for x in detected:
        if x[1][0] == 1:
            issues.append(x[0])
    return csv_file[~csv_file['updated_paths'].isin(issues)]

In [8]:
wiki_csv = drop_single_channel_imgs(wiki_csv)

Corrupt JPEG data: 11 extraneous bytes before marker 0xd9
Corrupt JPEG data: 839 extraneous bytes before marker 0xd9
Corrupt JPEG data: 549 extraneous bytes before marker 0xd9


In [9]:
def fix_first_values(boxes):
    location_values = []
    for x in boxes:
        temp = x.split(',')
        current_data = []
        for x in temp:
            current_data.append(float(x))
        location_values.append(current_data)
    return location_values

In [10]:
def split_datasets(csv_file, test_size=0.01):
    train, test = train_test_split(csv_file, test_size=test_size, random_state=42)
    train, val = train_test_split(train, test_size=test_size, random_state=42)
    return train, val, test

In [11]:
class FaceDataset(Dataset):
    def __init__(self, img_paths, boxes, target_size):
        super().__init__()
        self.img_paths = img_paths
        self.boxes = fix_first_values(boxes)
        self.target_size = target_size
    
    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img = torchvision.io.read_file(self.img_paths[idx])
        img = torchvision.io.decode_jpeg(img)
        box = torch.Tensor(resize_values(self.boxes[idx], img.shape, self.target_size, return_old_box=False, is_torch=True)).float()
        img = torchvision.transforms.functional.resize(img, (self.target_size[0], self.target_size[1]))
        img = img.float()
        return img, box

In [12]:
# dataset = FaceDataset(train['updated_paths'].values.tolist(), train['face_location'].values.tolist(), (512, 512))

In [13]:
# dataset.__getitem__(5)[1]

In [14]:
def create_and_get_model():
    model = torchvision.models.resnet50()
    model.fc = nn.Linear(2048, 4)
    return model

In [23]:
def loss_fn(inputs, targets):
    x0 = torch.sum(torch.abs(targets[:, 0] - inputs[:, 0]) / inputs.shape[0])
    y0 = torch.sum(torch.abs(targets[:, 1] - inputs[:, 1]) / inputs.shape[0])
    x1 = torch.sum(torch.abs(targets[:, 2] - inputs[:, 2]) / inputs.shape[0])
    y1 = torch.sum(torch.abs(targets[:, 3] - inputs[:, 3]) / inputs.shape[0])
    return x0 + y0 + x1 + y1
    # loss = nn.L1Loss()
    # output = loss(inputs, targets)
    # return output

In [30]:
def train_fn(model, train_dataset, val_dataset, epochs):
    datasets = {
        'train': train_dataset,
        'val' : val_dataset
    }
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    if next(model.parameters()).is_cuda == False:
        if multiple_gpus == True:
            model = nn.DataParallel(model)
        model = model.to(device)
    for epoch in range(epochs):
        train_epoch_x0_loss = 0.0
        val_epoch_x0_loss = 0.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            if phase == 'val':
                model.eval()
            running_x0_loss = 0.0
            with tqdm(datasets[phase], unit='batch') as tepoch:
                for img, label in tepoch:
                    tepoch.set_description(f'Epoch: {epoch}')
                    img = img.to(device)
                    label = label.to(device)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(img)
                        loss = loss_fn(outputs, label)
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                    running_x0_loss += loss.item()
                    tepoch.set_postfix(loss = loss.item())

In [31]:
train, val, test = split_datasets(wiki_csv, 0.01)
train_dataset = FaceDataset(train['updated_paths'].values.tolist(), train['face_location'].values.tolist(), (256, 256))
train_loader = DataLoader(train_dataset, 16, True, prefetch_factor=2)
val_dataset = FaceDataset(val['updated_paths'].values.tolist(), val['face_location'].values.tolist(), (256, 256))
val_loader = DataLoader(val_dataset, 16, True, prefetch_factor=2)

In [32]:
train_fn(create_and_get_model(), train_loader, val_loader, 1)

Epoch: 0:   4%|▍         | 114/2846 [01:06<26:39,  1.71batch/s, loss=203]


KeyboardInterrupt: 