In [1]:
import sys
import torch
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

import time
import os
import numpy as np
import json
import cv2
from PIL import Image, ImageOps
import random
from tqdm import tqdm
import operator
import itertools
from scipy.io import  loadmat
import logging
from scipy import signal
from utils import data_transforms
from tqdm import tqdm
from sklearn.metrics import roc_auc_score


from models.gazenet import GazeNet
from models.__init__ import save_checkpoint, resume_checkpoint
from dataloader.gazenet import GooDataset, GazeDataset

In [2]:
def boxes2centers(normalized_boxes):
    
    center_x = (normalized_boxes[:,0] + normalized_boxes[:,2]) / 2
    center_y = (normalized_boxes[:,1] + normalized_boxes[:,3]) / 2
    center_x = np.expand_dims(center_x, axis=1)
    center_y = np.expand_dims(center_y, axis=1)
    normalized_centers = np.hstack((center_x, center_y))
    
    return normalized_centers

def select_nearest_bbox(gazepoint, gt_bboxes, gt_labels):
    '''
    In: Accepts gazepoint (2,) and bboxes (n_boxes, 4), normalized from [0,1]
    Out: Returns the bbox nearest to gazepoint.
    '''
    
    centers = boxes2centers(gt_bboxes)
    
    diff = centers - gazepoint
    l2dist = np.sqrt(diff[:,0]**2 + diff[:,1]**2)
    min_idx = l2dist.argmin()
    
    
    nearest_box = {
        'box' : gt_bboxes[min_idx],
        'label': gt_labels[min_idx],
        'index' : min_idx
    }
    return nearest_box

# Precalculated mean box dimensions for GOO Objects
#(0.044,0.074) - Synth
# (0.013, 0.045)
def point2box(point, mean_boxdims=(0.013, 0.045)):
    
    mbd = mean_boxdims
    x1, y1 = point[0]-(mbd[0]/2), point[1]-(mbd[1]/2) #Upper left corner
    x2, y2 = point[0]+(mbd[0]/2), point[1]+(mbd[1]/2) #Lower right corner
    
    return [x1, y1, x2, y2]

def calculate_metrics(npzfile, dataset):
    predictions = np.load(npzfile)
    
    error = []
    percent_dists=[0.01, 0.03, 0.05, 0.10, 0.15, 0.20, 0.25, 0.3]
    PA_count = np.zeros((len(percent_dists)))
    
    gazepoints = predictions['gazepoints']
    
    for idx, f_point in tqdm(enumerate(gazepoints), total=len(dataset)):
        
        data = dataset[idx]
        
        # Calculate L2, and use for pa/cpa
        gt_point = data['gt_position'].numpy()
        f_error = f_point - gt_point
        f_dist = np.sqrt(f_error[0] ** 2 + f_error[1] ** 2)
        error.append(f_dist)
        
        #Calc pa (corners)
        gt_idx = data['gaze_idx']
        gt_box = data['gt_bboxes'][gt_idx][:]
        pred_box = point2box(f_point)
        
        dist_ul = np.sqrt((pred_box[0]-gt_box[0])** 2 + (pred_box[1]-gt_box[1])** 2)
        dist_lr = np.sqrt((pred_box[2]-gt_box[2])** 2 + (pred_box[3]-gt_box[3])** 2)
        corner_dist = (dist_ul + dist_lr) /2 

        PA_count[np.array(percent_dists) > corner_dist] += 1     
        
    l2 = np.mean(np.array(error))
    pa = PA_count / len(dataset)
    metrics = {
        'l2' : l2,
        'pa' : pa,
    }
    
    return metrics

def calc_mean_boxdim(dataset):
    
    mean_dims = []
    
    for data in tqdm(dataset, total=len(dataset)):
        
        gt_idx = data['gaze_idx']
        x1, y1, x2, y2 = data['gt_bboxes'][gt_idx][:]
        
        x_delta = x2 - x1
        y_delta = y2 - y1
        
        mean_dims.append([x_delta, y_delta])
        
    mean_dims = np.mean(np.vstack(mean_dims), axis=0)

    print(mean_dims)
    

In [3]:
# REAL

#Prepare dataloaders
test_images_dir = '/home/eee198/Documents/datasets/GOOReal/finalrealdatasetImgsV2/'
test_pickle_path = '/home/eee198/Documents/datasets/GOOReal/testrealhumans.pickle'
batch_size = 16

# Dataset
val_set = GooDataset(test_images_dir, test_pickle_path, 'test', use_bboxes=True)

==> Number of Images: 2156




In [4]:
predictions_path = 'predictions.npz'
metrics = calculate_metrics(predictions_path, val_set)

PA_count = metrics['pa']
print('L2 Distance: ', metrics['l2'])
print("Percentage Distances: ", [0.01, 0.03, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])
print("Proximate Accuracy: \t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t"%
            (PA_count[0],
            PA_count[1],
            PA_count[2],
            PA_count[3],
            PA_count[4],
            PA_count[5],
            PA_count[6],
            PA_count[7],
            ))

 10%|▉         | 206/2156 [00:08<01:19, 24.44it/s]


KeyboardInterrupt: 

In [None]:
# SYNTH

#Prepare dataloaders
test_images_dir = '/hdd/HENRI/goosynth/test/'
test_pickle_path = '/hdd/HENRI/goosynth/picklefiles/testpickle120.pickle'
batch_size = 16

#For GOO
val_set = GooDataset(test_images_dir, test_pickle_path, 'test', use_bboxes=True)
val_set_load = GooDataset(test_images_dir, test_pickle_path, 'test')
test_data_loader = torch.utils.data.DataLoader(val_set_load, batch_size=16, num_workers=8, shuffle=False)

In [None]:
PA_count = metrics['pa']
print('L2 Distance: ', metrics['l2'])
print("Percentage Distances: ", [0.01, 0.03, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])
print("Proximate Accuracy: \t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t"%
            (PA_count[0],
            PA_count[1],
            PA_count[2],
            PA_count[3],
            PA_count[4],
            PA_count[5],
            PA_count[6],
            PA_count[7],
            ))

In [None]:
def draw_result(image_path, eye, heatmap, gaze_point, gt_point, idx=0):
    x1, y1 = eye
    x2, y2 = gaze_point
    x3, y3 = gt_point
    im = cv2.imread(image_path)
    image_height, image_width = im.shape[:2]
    x1, y1 = image_width * x1, y1 * image_height
    x2, y2 = image_width * x2, y2 * image_height
    x3, y3 = image_width * x3, y3 * image_height
    x1, y1, x2, y2, x3, y3 = map(int, [x1, y1, x2, y2, x3, y3])
    cv2.circle(im, (x1, y1), 5, [255, 255, 255], -1)
    cv2.circle(im, (x2, y2), 5, [255, 255, 255], -1)
    cv2.circle(im, (x3, y3), 5, [255, 255, 255], -1)
    cv2.line(im, (x1, y1), (x2, y2), [255, 0, 0], 2)
    cv2.line(im, (x1, y1), (x3, y3), [0, 165, 255], 2)

    # heatmap visualization
    heatmap = ((heatmap - heatmap.min()) / (heatmap.max() - heatmap.min()) * 255).astype(np.uint8)
    heatmap = np.stack([heatmap, heatmap, heatmap], axis=2)
    heatmap = cv2.resize(heatmap, (image_width, image_height))

    heatmap = (0.8 * heatmap.astype(np.float32) + 0.2 * im.astype(np.float32)).astype(np.uint8)
    img = np.concatenate((im, heatmap), axis=1)
    
    save_dir = './sample_out/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        
    filename = 'out_%s.png' % str(idx)
    save_path = save_dir + filename
    print(save_path)
    cv2.imwrite(save_path, img)

    return img

In [None]:
calc_mean_boxdim(val_set)

In [8]:
def demo_random(npzfile, dataset):
    
    # Load predictions from saved npz file
    predictions = np.load(npzfile)
    gazepoints = predictions['gazepoints']

    # Get a random sample from dataset
    idx = np.random.randint(len(dataset))
    data = dataset[idx]

    # Load data from the sample
    image, face_image, gaze_field, eye_position = data['image'], data['face_image'], data['gaze_field'], data['eye_position']
    image, face_image, gaze_field, eye_position = map(lambda x: Variable(x.cuda(), volatile=True), [image, face_image, gaze_field, eye_position])
    gt_position = data['gt_position']
    image_path = data['image_path']
    gt_bboxes = data['gt_bboxes']
    gt_labels = data['gt_labels']
    
    # Draw gazepoints and gt
    im = cv2.imread(image_path)
    image_height, image_width = im.shape[:2]
    x1, y1 = eye_position
    x2, y2 = gt_position
    x3, y3 = gazepoints[idx]
    x1, y1 = image_width * x1, y1 * image_height
    x2, y2 = image_width * x2, y2 * image_height
    x3, y3 = image_width * x3, y3 * image_height
    x1, y1, x2, y2, x3, y3 = map(int, [x1, y1, x2, y2, x3, y3])
    cv2.circle(im, (x1, y1), 5, [255, 255, 255], -1)
    cv2.circle(im, (x2, y2), 5, [255, 255, 255], -1)
    cv2.circle(im, (x3, y3), 5, [255, 255, 255], -1)
    cv2.line(im, (x1, y1), (x2, y2), [255, 0, 0], 2) 
    cv2.line(im, (x1, y1), (x3, y3), [0, 165, 255], 2) 

    # Select nearest bbox given the gazepoint
    gazepoint = gazepoints[idx]
    #gt_bboxes = gt_bboxes / [image_width, image_height, image_width, image_height]
    bbox_data = select_nearest_bbox(gazepoint, gt_bboxes, gt_labels)
    nearest_bbox = bbox_data['box']

    # Scale to image size
    nearest_bbox = nearest_bbox * [image_width, image_height, image_width, image_height]
    nearest_bbox = nearest_bbox.astype(int)

    # Draw bbox of prediction
    cv2.rectangle(im, (nearest_bbox[0], nearest_bbox[1]), (nearest_bbox[2], nearest_bbox[3]), (0,165,255), 2)
    
    # Draw bbox of gt
    gaze_idx = data['gaze_idx']
    box = gt_bboxes[gaze_idx]
    nearest_bbox = box * [image_width, image_height, image_width, image_height]
    nearest_bbox = nearest_bbox.astype(int)
    cv2.rectangle(im, (nearest_bbox[0], nearest_bbox[1]), (nearest_bbox[2], nearest_bbox[3]), (255,0,0), 2)

    img = im
    save_dir = './temp/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    filename = 'out_%s.png' % str(1)
    save_path = save_dir + filename
    cv2.imwrite(save_path, img)
    
    return None

In [9]:
demo_random('predictions.npz', val_set)

  del sys.path[0]


In [5]:
!python inference.py \
--test_dir='/hdd/HENRI/goosynth/test/'\
--test_annotation='/hdd/HENRI/goosynth/picklefiles/testpickle120.pickle'\
--resume_path='./saved_models/gazenet_goo/model_epoch25.pth.tar'

=> loading checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar'
=> Optimizer has different parameter groups. Usually this will occur for staged optimizers (GazeNet, GazeMask)
=> loaded checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar' (epoch 25)
==> Number of Images: 19200


In [6]:
!python evaluate.py \
--test_dir='/hdd/HENRI/goosynth/test/'\
--test_annotation='/hdd/HENRI/goosynth/picklefiles/testpickle120.pickle'\
--resume_path='./saved_models/gazenet_goo/model_epoch25.pth.tar'\
#--predictions_npz='./predictions.npz'

=> loading checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar'
=> Optimizer has different parameter groups. Usually this will occur for staged optimizers (GazeNet, GazeMask)
=> loaded checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar' (epoch 25)
==> Number of Images: 19200
==> Number of Images: 19200
==> No npzfile provided. Inference will be done on the test dataset and will be saved to predictions.npz
100%|███████████████████████████████████████| 1200/1200 [08:55<00:00,  2.24it/s]
==> Calculating eval metrics...
100%|█████████████████████████████████████| 19200/19200 [07:45<00:00, 41.27it/s]
AUC: 0.949743923611111
L2 Distance:  0.10772794595155237
Angular Error: 19.720474374658128
Percentage Distances:  [0.01, 0.03, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
Proximate Accuracy: 	0.027	0.173	0.314	0.542	0.767	0.903	0.958	0.978	
Class Proximate Accuracy: 	0.024	0.147	0.242	0.318	0.332	0.334	0.335	0.336	
