In [1]:
import sys
import torch
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

import time
import os
import numpy as np
import json
import cv2
from PIL import Image, ImageOps
import random
from tqdm import tqdm
import operator
import itertools
from scipy.io import  loadmat
import logging

from scipy import signal

from utils import data_transforms

from models.gazenet import GazeNet
from models.__init__ import save_checkpoint, resume_checkpoint
from dataloader.gazenet import GooDataset, GazeDataset

def generate_data_field(eye_point):
    """eye_point is (x, y) and between 0 and 1"""
    height, width = 224, 224
    x_grid = np.array(range(width)).reshape([1, width]).repeat(height, axis=0)
    y_grid = np.array(range(height)).reshape([height, 1]).repeat(width, axis=1)
    grid = np.stack((x_grid, y_grid)).astype(np.float32)

    x, y = eye_point
    x, y = x * width, y * height

    grid -= np.array([x, y]).reshape([2, 1, 1]).astype(np.float32)
    norm = np.sqrt(np.sum(grid ** 2, axis=0)).reshape([1, height, width])
    # avoid zero norm
    norm = np.maximum(norm, 0.1)
    grid /= norm
    return grid

def preprocess_image(image_path, eye):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    # crop face
    x_c, y_c = eye
    x_0 = x_c - 0.15
    y_0 = y_c - 0.15
    x_1 = x_c + 0.15
    y_1 = y_c + 0.15
    if x_0 < 0:
        x_0 = 0
    if y_0 < 0:
        y_0 = 0
    if x_1 > 1:
        x_1 = 1
    if y_1 > 1:
        y_1 = 1

    h, w = image.shape[:2]
    face_image = image[int(y_0 * h):int(y_1 * h), int(x_0 * w):int(x_1 * w), :]
    # process face_image for face net
    face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
    face_image = Image.fromarray(face_image)
    face_image = data_transforms['test'](face_image)
    # process image for saliency net
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = Image.fromarray(image)
    image = data_transforms['test'](image)

    # generate gaze field
    gaze_field = generate_data_field(eye_point=eye)
    sample = {'image' : image,
              'face_image': face_image,
              'eye_position': torch.FloatTensor(eye),
              'gaze_field': torch.from_numpy(gaze_field)}

    return sample


def test(net, test_image_path, eye):
    net.eval()
    heatmaps = []

    data = preprocess_image(test_image_path, eye)

    image, face_image, gaze_field, eye_position = data['image'], data['face_image'], data['gaze_field'], data['eye_position']
    image, face_image, gaze_field, eye_position = map(lambda x: Variable(x.unsqueeze(0).cuda(), volatile=True), [image, face_image, gaze_field, eye_position])

    _, predict_heatmap = net([image, face_image, gaze_field, eye_position])

    final_output = predict_heatmap.cpu().data.numpy()

    heatmap = final_output.reshape([224 // 4, 224 // 4])

    h_index, w_index = np.unravel_index(heatmap.argmax(), heatmap.shape)
    f_point = np.array([w_index / 56., h_index / 56.])


    return heatmap, f_point[0], f_point[1]

def test_on_single(net, dataloader):
    net.eval()
    heatmaps = []
    
    idx = 10
    data = next(iter(dataloader))
    image_path = data['image_path'][idx]
    
    image, face_image, gaze_field, eye_position = data['image'], data['face_image'], data['gaze_field'], data['eye_position']
    image, face_image, gaze_field, eye_position = map(lambda x: Variable(x.cuda(), volatile=True), [image, face_image, gaze_field, eye_position])
    
    x = eye_position[idx][0].item()
    y = eye_position[idx][1].item()
    eyes = (x, y)
    #print(image.shape)
    #print(face_image.shape)
    #print(eye_position.shape)
    direction, predict_heatmap = net([image, face_image, gaze_field, eye_position])

    final_output = predict_heatmap.cpu().data.numpy()

    heatmap = final_output[idx].reshape([224 // 4, 224 // 4])

    h_index, w_index = np.unravel_index(heatmap.argmax(), heatmap.shape)
    f_point = np.array([w_index / 56., h_index / 56.])


    return heatmap, eyes, f_point, image_path

def test_on_batch(net, dataloader):
    net.eval()
    heatmaps = []
    
    data = next(iter(dataloader))
    
    image, face_image, gaze_field, eye_position = data['image'], data['face_image'], data['gaze_field'], data['eye_position']
    image, face_image, gaze_field, eye_position = map(lambda x: Variable(x.cuda(), volatile=True), [image, face_image, gaze_field, eye_position])
    gt_position = data['gt_position']
    image_paths = data['image_path']
    N = image.shape[0]
    
    #print(gt_position[0])
    
    direction, predict_heatmap = net([image, face_image, gaze_field, eye_position])
    final_output = predict_heatmap.cpu().data.numpy()
    
    for idx in range(N):

        heatmap = final_output[idx].reshape([224 // 4, 224 // 4])

        h_index, w_index = np.unravel_index(heatmap.argmax(), heatmap.shape)
        f_point = np.array([w_index / 56., h_index / 56.])
        
        draw_result(image_paths[idx], eye_position[idx], heatmap, f_point, gt_position[idx], idx)

def draw_result(image_path, eye, heatmap, gaze_point, gt_point, idx=0):
    x1, y1 = eye
    x2, y2 = gaze_point
    x3, y3 = gt_point
    im = cv2.imread(image_path)
    image_height, image_width = im.shape[:2]
    x1, y1 = image_width * x1, y1 * image_height
    x2, y2 = image_width * x2, y2 * image_height
    x3, y3 = image_width * x3, y3 * image_height
    x1, y1, x2, y2, x3, y3 = map(int, [x1, y1, x2, y2, x3, y3])
    cv2.circle(im, (x1, y1), 5, [255, 255, 255], -1)
    cv2.circle(im, (x2, y2), 5, [255, 255, 255], -1)
    cv2.circle(im, (x3, y3), 5, [255, 255, 255], -1)
    cv2.line(im, (x1, y1), (x2, y2), [255, 0, 0], 2)
    cv2.line(im, (x1, y1), (x3, y3), [0, 165, 255], 2)

    # heatmap visualization
    heatmap = ((heatmap - heatmap.min()) / (heatmap.max() - heatmap.min()) * 255).astype(np.uint8)
    heatmap = np.stack([heatmap, heatmap, heatmap], axis=2)
    heatmap = cv2.resize(heatmap, (image_width, image_height))

    heatmap = (0.8 * heatmap.astype(np.float32) + 0.2 * im.astype(np.float32)).astype(np.uint8)
    img = np.concatenate((im, heatmap), axis=1)
    
    save_dir = './sample_out/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        
    filename = 'out_%s.png' % str(idx)
    save_path = save_dir + filename
    print(save_path)
    cv2.imwrite(save_path, img)

    return img


In [12]:
# Load Model
net = GazeNet()
net.cuda()

resume_path = './saved_models/gazenet_goo/model_epoch25.pth.tar'
net, optimizer, start_epoch = resume_checkpoint(net, None, resume_path)

#Prepare dataloaders
test_images_dir = '/hdd/HENRI/goosynth/test/'
test_pickle_path = '/hdd/HENRI/goosynth/picklefiles/testpickle120.pickle'
batch_size = 16

#For GOO
val_set = GooDataset(test_images_dir, test_pickle_path, 'test', include_path=True)
test_data_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, num_workers=8, shuffle=False)

test_on_batch(net, test_data_loader)

=> loading checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar'
=> Optimizer has different parameter groups. Usually this will occur for staged optimizers (GazeNet, GazeMask)
=> loaded checkpoint './saved_models/gazenet_goo/model_epoch25.pth.tar' (epoch 25)
19200




tensor([0.6062, 0.4292])
./sample_out/out_0.png
./sample_out/out_1.png
./sample_out/out_2.png
./sample_out/out_3.png
./sample_out/out_4.png
./sample_out/out_5.png
./sample_out/out_6.png
./sample_out/out_7.png
./sample_out/out_8.png
./sample_out/out_9.png
./sample_out/out_10.png
./sample_out/out_11.png
./sample_out/out_12.png
./sample_out/out_13.png
./sample_out/out_14.png
./sample_out/out_15.png
