In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
############## sys imports #############
import os
import sys
import time
import copy
import argparse
import datetime
############## basic stats imports #############
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
############## pytorch imports #############
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms, utils, models
from torch.utils.data import Dataset, DataLoader

In [3]:
############## custom imports #############
from dataloader import FaceScrubDataset, TripletFaceScrub, SiameseFaceScrub
from dataloader import FaceScrubBalancedBatchSampler

from networks import *
from losses import OnlineTripletLoss
from openface.loadOpenFace import prepareOpenFace
from utils import save_checkpoint, save_hyperparams, AverageMeter, HardestNegativeTripletSelector, RandomNegativeTripletSelector, SemihardNegativeTripletSelector

In [4]:
DATA_PATH = '/home/s1791387/facescrub-data/new_data_max/'
TRAIN_PATH = os.path.join(DATA_PATH, 'train_full_with_ids.txt')
VALID_PATH = os.path.join(DATA_PATH, 'val_full_with_ids.txt')
TEST_PATH = os.path.join(DATA_PATH, 'test_full_with_ids.txt')
WEIGHTS_PATH = '/home/s1791387/facescrub-data/new_data_max/openface_model_weigths/job_semi_std_cos3_Jul_25_1000hrs/weights_75.pth'

In [5]:
batch_size = 512
input_size = 96
output_dim = 128
learning_rate = 1e2
num_epochs = 10
start_epoch = 0

triplet_margin = 1.  # margin
triplet_p = 2  # norm degree for distance calculation

resume_training = True
workers = 8
use_cuda = True

In [6]:
cuda = False
pin_memory = False
if use_cuda and torch.cuda.is_available():
    device = torch.device("cuda")
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    cuda = True
    cudnn.benchmark = True
    pin_memory = True
else:
    device = torch.device("cpu")

print('Device set: {}'.format(device))
print('Training set path: {}'.format(TRAIN_PATH))
print('Training set Path exists: {}'.format(os.path.isfile(TRAIN_PATH)))

Device set: cuda
Training set path: /home/s1791387/facescrub-data/new_data_max/train_full_with_ids.txt
Training set Path exists: True


In [7]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]),
    'val': transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
}


train_df = FaceScrubDataset(
    txt_file=TRAIN_PATH, root_dir=DATA_PATH, transform=data_transforms['val'])

val_df = FaceScrubDataset(
    txt_file=VALID_PATH, root_dir=DATA_PATH, transform=data_transforms['val'])

siamese_train_df = SiameseFaceScrub(train_df, train=True)
print('Train data converted to siamese form. Length: {}'.format(len(siamese_train_df)))

siamese_val_df=SiameseFaceScrub(val_df, train=False)
print('Validation data converted to siamese form. Length: {}'.format(
    len(siamese_val_df)))

train_loader=torch.utils.data.DataLoader(
        siamese_train_df, batch_size=batch_size, shuffle=True, pin_memory=pin_memory, num_workers=workers)
print('Train loader created. Length of train loader: {}'.format(
        len(train_loader)))
    
val_loader=torch.utils.data.DataLoader(
        siamese_val_df, batch_size=batch_size, shuffle=False, pin_memory=pin_memory, num_workers=workers)
print('Val loader created. Length of train loader: {}'.format(
        len(val_loader)))



Train data converted to siamese form. Length: 54981
Validation data converted to siamese form. Length: 5881
Train loader created. Length of train loader: 108
Val loader created. Length of train loader: 12


In [8]:
openface = prepareOpenFace(useCuda=cuda)
params = sum(p.numel() for p in openface.parameters() if p.requires_grad)
print('Number of params in network {}'.format(params))

en_optimizer=optim.Adam(openface.parameters(), lr=learning_rate)

T_max = num_epochs
eta_min = 0.01
en_scheduler = lr_scheduler.CosineAnnealingLR(en_optimizer, T_max=T_max, eta_min=eta_min)

Sent model to GPU
Number of params in network 3733968


In [9]:
classifier = ClassNet(input_size=output_dim, training=True)
cl_optimizer = optim.Adam(classifier.parameters(), lr=learning_rate)

T_max = num_epochs
eta_min = 0.01
cl_scheduler = lr_scheduler.CosineAnnealingLR(cl_optimizer, T_max=T_max, eta_min=eta_min)
cl_criterion = nn.BCEWithLogitsLoss()

In [10]:
if resume_training:
    resume_weights=WEIGHTS_PATH
    if cuda:
        checkpoint=torch.load(resume_weights)
    else:
        # Load GPU model on CPU
        checkpoint=torch.load(resume_weights,
                                map_location=lambda storage,
                                loc: storage)

    start_epoch=checkpoint['epoch']
    openface.load_state_dict(checkpoint['state_dict'])
    en_optimizer.load_state_dict(checkpoint['optimizer'])
    best_loss = checkpoint['best_loss']
    # scheduler.load_state_dict(checkpoint['scheduler'])
    print("=> loaded checkpoint '{}' (trained for {} epochs)".format(
        resume_weights, checkpoint['epoch']))
#     for epoch in range(0, start_epoch):
#         en_scheduler.step()

=> loaded checkpoint '/home/s1791387/facescrub-data/new_data_max/openface_model_weigths/job_semi_std_cos3_Jul_25_1000hrs/weights_75.pth' (trained for 75 epochs)


In [11]:
if cuda:
    openface.cuda()
    classifier.cuda()
    print('Sent model to gpu {}'.format(
        next(openface.parameters()).is_cuda))

Sent model to gpu True


In [12]:
def train(train_loader, classifier, encoder, criterion, en_optimizer, cl_optimizer, epoch, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    print_freq=1
    # switch to train mode
    classifier.train()
    encoder.train()

    end = time.time()
    for batch_idx, ([imgs1,imgs2], [labels1, labels2], target) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs1 = imgs1.to(device)
        imgs2 = imgs2.to(device)
        target = target.to(device).float()
#         print(target.shape, target)
        embed1, _ = encoder(imgs1)
        embed2, _ = encoder(imgs2)
        pair_embed = torch.cat((embed1, embed2), dim=1)
#         print(pair_embed.shape)
        pred_target = classifier(pair_embed)
        pred_target.squeeze_()
#         print(pred_target.squeeze_())
#         print(pred_target.shape)
        loss = cl_criterion(pred_target, target)
#         print(loss)
        losses.update(loss.item(), imgs1[0].size(0))

        en_optimizer.zero_grad()
        cl_optimizer.zero_grad()

        loss.backward()
        en_optimizer.step()
        cl_optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                   epoch, batch_idx, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))
    return losses.avg

def validate(val_loader, classifier, encoder, criterion, epoch, device):
    batch_time = AverageMeter()
    losses = AverageMeter()
    print_freq=100
    # switch to evaluate mode
    classifier.eval()
    encoder.eval()

    with torch.no_grad():
        end = time.time()
        for batch_idx, ([imgs1,imgs2], [labels1, labels2], target) in enumerate(val_loader):
            imgs1 = imgs1.to(device)
            imgs2 = imgs2.to(device)
            target = target.to(device).float()
    #         print(target.shape, target)
            embed1, _ = openface(imgs1)
            embed2, _ = openface(imgs2)
            pair_embed = torch.cat((embed1, embed2), dim=1)
    #         print(pair_embed.shape)
            pred_target = classifier(pair_embed)
            pred_target.squeeze_()
    #         print(pred_target.squeeze_())
    #         print(pred_target.shape)
            loss = cl_criterion(pred_target, target)
    #         print(loss)
            losses.update(loss.item(), imgs1[0].size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                       batch_idx, len(val_loader), batch_time=batch_time, loss=losses))

    return losses.avg

In [13]:
print('-'*10)
print('Beginning Training')
train_losses = []
val_losses = []
epoch_time = AverageMeter()
ep_end = time.time()
for epoch in range(start_epoch, start_epoch + num_epochs):

    en_scheduler.step()
    cl_scheduler.step()

    # train
    train_loss = train(train_loader, classifier, openface, cl_criterion, en_optimizer, cl_optimizer, epoch, device)
    train_losses.append(train_loss)
    # validate
    print('-'*10)
    val_loss = validate(val_loader, classifier, openface, cl_criterion, epoch, device)

    print('Avg validation loss: {}'.format(val_loss))
    val_losses.append(val_loss)

    state = {
        'epoch': epoch,
        'state_dict': openface.state_dict(),
        'optimizer': optimizer.state_dict(),
        'train_losses': train_losses,
        'val_losses': val_losses,
        'best_loss': best_loss
        # 'scheduler': scheduler.state_dict()
    }
    if best_loss > val_loss:
        best_loss = val_loss
        MODEL_NAME = os.path.join(
            WEIGHTS_PATH, 'weights_{}.pth'.format(epoch))
        save_checkpoint(state, True, WEIGHTS_PATH, MODEL_NAME)
    print('-' * 20)
    epoch_time.update(time.time() - ep_end)
    ep_end = time.time()
    print('Epoch {}/{}\t'
          'Time {epoch_time.val:.3f} sec ({epoch_time.avg:.3f} sec)'.format(epoch, start_epoch + num_epochs - 1, epoch_time=epoch_time))
    print('-'*20)

print('Finished training')

----------
Beginning Training
Epoch: [75][0/108]	Time 89.851 (89.851)	Data 65.511 (65.511)	Loss 0.6938 (0.6938)
Epoch: [75][1/108]	Time 0.695 (45.273)	Data 0.005 (32.758)	Loss 68295.7734 (34148.2336)
Epoch: [75][2/108]	Time 0.711 (30.419)	Data 0.006 (21.840)	Loss 567761.1250 (212019.1974)
Epoch: [75][3/108]	Time 0.694 (22.988)	Data 0.001 (16.381)	Loss 2755523.0000 (847895.1481)
Epoch: [75][4/108]	Time 22.062 (22.803)	Data 21.374 (17.379)	Loss 519568.5000 (782229.8184)
Epoch: [75][5/108]	Time 0.706 (19.120)	Data 0.001 (14.483)	Loss 2924359.7500 (1139251.4737)
Epoch: [75][6/108]	Time 0.695 (16.488)	Data 0.001 (12.414)	Loss 1538636.2500 (1196306.4417)
Epoch: [75][7/108]	Time 0.711 (14.516)	Data 0.001 (10.862)	Loss 729353.3125 (1137937.3006)
Epoch: [75][8/108]	Time 52.591 (18.746)	Data 51.804 (15.411)	Loss 135825.2188 (1026591.5137)
Epoch: [75][9/108]	Time 0.806 (16.952)	Data 0.001 (13.870)	Loss 139603.5938 (937892.7217)
Epoch: [75][10/108]	Time 0.792 (15.483)	Data 0.001 (12.610)	Loss 1124

Process Process-1:
Process Process-2:
Process Process-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process Process-3:
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target

Traceback (most recent call last):
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-3197ebd222c4>", line 13, in <module>
    train_loss = train(train_loader, classifier, openface, cl_criterion, en_optimizer, cl_optimizer, epoch, device)
  File "<ipython-input-12-0cd535ed7af7>", line 11, in train
    for batch_idx, ([imgs1,imgs2], [labels1, labels2], target) in enumerate(train_loader):
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 330, in __next__
    idx, batch = self._get_batch()
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 309, in _get_batch
    return self.data_queue.get()
  File "/home/s1791387/miniconda3/envs/fairness/lib/python3.6/queue.py", line 164, in get
    self.not_empty.wait()

KeyboardInterrupt: 

In [None]:
print(torch.cuda.memory_allocated())

In [None]:
def tsne(embeddings):
    import sklearn.manifold
    return torch.from_numpy(sklearn.manifold.TSNE(n_iter = 250).fit_transform(embeddings.numpy()))

In [None]:
def svg(points, labels, thumbnails, legend_size = 1e-1, legend_font_size = 5e-2, circle_radius = 5e-3):
	points = (points - points.min(0)[0]) / (points.max(0)[0] - points.min(0)[0])
	class_index = sorted(set(labels))
	class_colors = [360.0 * i / len(class_index) for i in range(len(class_index))]
	colors = [class_colors[class_index.index(label)] for label in labels]
	thumbnails_base64 = [base64.b64encode(cv2.imencode('.jpg', img.mul(255).permute(1, 2, 0).numpy()[..., ::-1])[1]) for img in thumbnails]
	return '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1 1">' + \
	   ''.join(map('''<circle cx="{}" cy="{}" title="{}" fill="hsl({}, 50%, 50%)" r="{}" desc="data:image/jpeg;base64,{}" onmouseover="evt.target.ownerDocument.getElementById('preview').setAttribute('href', evt.target.getAttribute('desc')); evt.target.ownerDocument.getElementById('label').textContent = evt.target.getAttribute('title');" />'''.format, points[:, 0], points[:, 1], labels, colors, [circle_radius] * len(points), thumbnails_base64)) + \
	   '''<image id="preview" x="0" y="{legend_size}" width="{legend_size}" height="{legend_size}" />
	   <text id="label" x="0" y="{legend_size}" font-size="{legend_font_size}" />
	   </svg>'''.format(legend_size = legend_size, legend_font_size = legend_font_size)

In [None]:
tsne_embeddings = tsne(train_embeddings)

In [None]:
import cv2
import base64

In [None]:
open('train_tsne.svg', 'w').write(svg(tsne_embeddings, person_id, thumbnails))

In [None]:
train_embeddings = train_embeddings.numpy()
np.correlate(train_embeddings, gender)

In [None]:
import sklearn

In [None]:
sklearn.metrics.mutual_info_score(train_embeddings, gender, contingency=None)