In [9]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import DataParallel
from models.gazenet import GazeNet

import time
import os
import numpy as np
import json
import cv2
from PIL import Image, ImageOps
import random
from tqdm import tqdm
import operator
import itertools
from scipy.io import  loadmat
import logging

from scipy import signal

from utils import data_transforms
from utils import get_paste_kernel, kernel_map
import pickle

class GazeDataset(Dataset):
    def __init__(self, root_dir, mat_file, training='train', include_path=False):
        assert (training in set(['train', 'test']))
        self.root_dir = root_dir
        self.mat_file = mat_file
        self.training = training
        self.include_path = include_path

        anns = loadmat(self.mat_file)
        self.bboxes = anns[self.training + '_bbox'] #Literally not used for anything lol
        self.gazes = anns[self.training + '_gaze']
        self.paths = anns[self.training + '_path']
        self.eyes = anns[self.training + '_eyes']
        self.meta = anns[self.training + '_meta']
        self.image_num = self.paths.shape[0]
        
        print(self.bboxes[0,0][0])
        print(self.gazes[0,0][0])
        #print(len(self.paths))
        print(self.eyes[0,0][0])

        logging.info('%s contains %d images' % (self.mat_file, self.image_num))

    def generate_data_field(self, eye_point):
        """eye_point is (x, y) and between 0 and 1"""
        height, width = 224, 224
        x_grid = np.array(range(width)).reshape([1, width]).repeat(height, axis=0)
        y_grid = np.array(range(height)).reshape([height, 1]).repeat(width, axis=1)
        grid = np.stack((x_grid, y_grid)).astype(np.float32)

        x, y = eye_point
        x, y = x * width, y * height

        grid -= np.array([x, y]).reshape([2, 1, 1]).astype(np.float32)
        norm = np.sqrt(np.sum(grid ** 2, axis=0)).reshape([1, height, width])
        # avoid zero norm
        norm = np.maximum(norm, 0.1)
        grid /= norm
        return grid

    def __len__(self):
        return self.image_num

    def __getitem__(self, idx):
        image_path = self.paths[idx][0][0]
        image_path = os.path.join(self.root_dir, image_path)

        box = self.bboxes[0, idx][0]
        eye = self.eyes[0, idx][0]
        # todo: process gaze differently for training or testing
        gaze = self.gazes[0, idx].mean(axis=0)

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if random.random() > 0.5 and self.training == 'train':
            eye = [1.0 - eye[0], eye[1]]
            gaze = [1.0 - gaze[0], gaze[1]]
            image = cv2.flip(image, 1)

        # crop face
        x_c, y_c = eye
        x_0 = x_c - 0.15
        y_0 = y_c - 0.15
        x_1 = x_c + 0.15
        y_1 = y_c + 0.15
        if x_0 < 0:
            x_0 = 0
        if y_0 < 0:
            y_0 = 0
        if x_1 > 1:
            x_1 = 1
        if y_1 > 1:
            y_1 = 1
        h, w = image.shape[:2]
        face_image = image[int(y_0 * h):int(y_1 * h), int(x_0 * w):int(x_1 * w), :]
        # process face_image for face net
        face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
        face_image = Image.fromarray(face_image)
        face_image = data_transforms[self.training](face_image)
        # process image for saliency net
        #image = image_preprocess(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = data_transforms[self.training](image)

        # generate gaze field
        gaze_field = self.generate_data_field(eye_point=eye)
        # generate heatmap
        heatmap = get_paste_kernel((224 // 4, 224 // 4), gaze, kernel_map, (224 // 4, 224 // 4))
        '''
        direction = gaze - eye
        norm = (direction[0] ** 2.0 + direction[1] ** 2.0) ** 0.5
        if norm <= 0.0:
            norm = 1.0

        direction = direction / norm
        '''
        
        if self.include_path:
            sample = {'image' : image,
                      'face_image': face_image,
                      'eye_position': torch.FloatTensor(eye),
                      'gaze_field': torch.from_numpy(gaze_field),
                      'gt_position': torch.FloatTensor(gaze),
                      'gt_heatmap': torch.FloatTensor(heatmap).unsqueeze(0),
                      'image_path': image_path}
        else:
            sample = {'image' : image,
                  'face_image': face_image,
                  'eye_position': torch.FloatTensor(eye),
                  'gaze_field': torch.from_numpy(gaze_field),
                  'gt_position': torch.FloatTensor(gaze),
                  'gt_heatmap': torch.FloatTensor(heatmap).unsqueeze(0)
                     }

        return sample



In [6]:
class GooDataset(Dataset):
    def __init__(self, root_dir, mat_file, training='train',include_path=False):
        assert (training in set(['train', 'test']))
        self.root_dir = root_dir
        self.mat_file = mat_file
        self.training = training
        self.include_path = include_path

        with open(mat_file, 'rb') as f:
            self.data = pickle.load(f)
            self.image_num = len(self.data)
            
        print(self.image_num)
        logging.info('%s contains %d images' % (self.mat_file, self.image_num))

    def generate_data_field(self, eye_point):
        """eye_point is (x, y) and between 0 and 1"""
        height, width = 224, 224
        x_grid = np.array(range(width)).reshape([1, width]).repeat(height, axis=0)
        y_grid = np.array(range(height)).reshape([height, 1]).repeat(width, axis=1)
        grid = np.stack((x_grid, y_grid)).astype(np.float32)

        x, y = eye_point
        x, y = x * width, y * height

        grid -= np.array([x, y]).reshape([2, 1, 1]).astype(np.float32)
        norm = np.sqrt(np.sum(grid ** 2, axis=0)).reshape([1, height, width])
        # avoid zero norm
        norm = np.maximum(norm, 0.1)
        grid /= norm
        return grid

    def __len__(self):
        return self.image_num

    def __getitem__(self, idx):
                     
        data = self.data[idx]
        image_path = data['filename']
        image_path = os.path.join(self.root_dir, image_path)
        #print(image_path)
        
        eye = [float(data['hx'])/640, float(data['hy'])/480]
        gaze = [float(data['gaze_cx'])/640, float(data['gaze_cy'])/480]
        #print('eye coords: ', eye)
        #print('gaze coords: ', gaze)

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)

        if random.random() > 0.5 and self.training == 'train':
            eye = [1.0 - eye[0], eye[1]]
            gaze = [1.0 - gaze[0], gaze[1]]
            image = cv2.flip(image, 1)

        # crop face
        x_c, y_c = eye
        x_0 = x_c - 0.15
        y_0 = y_c - 0.15
        x_1 = x_c + 0.15
        y_1 = y_c + 0.15
        if x_0 < 0:
            x_0 = 0
        if y_0 < 0:
            y_0 = 0
        if x_1 > 1:
            x_1 = 1
        if y_1 > 1:
            y_1 = 1
        h, w = image.shape[:2]
        face_image = image[int(y_0 * h):int(y_1 * h), int(x_0 * w):int(x_1 * w), :]
        # process face_image for face net
        face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
        face_image = Image.fromarray(face_image)
        face_image = data_transforms[self.training](face_image)
        # process image for saliency net
        #image = image_preprocess(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = data_transforms[self.training](image)

        # generate gaze field
        gaze_field = self.generate_data_field(eye_point=eye)
        # generate heatmap
        heatmap = get_paste_kernel((224 // 4, 224 // 4), gaze, kernel_map, (224 // 4, 224 // 4))
        '''
        direction = gaze - eye
        norm = (direction[0] ** 2.0 + direction[1] ** 2.0) ** 0.5
        if norm <= 0.0:
            norm = 1.0

        direction = direction / norm
        '''
        
        if self.include_path:
            sample = {'image' : image,
                      'face_image': face_image,
                      'eye_position': torch.FloatTensor(eye),
                      'gaze_field': torch.from_numpy(gaze_field),
                      'gt_position': torch.FloatTensor(gaze),
                      'gt_heatmap': torch.FloatTensor(heatmap).unsqueeze(0),
                      'image_path': image_path}
        else:
            sample = {'image' : image,
                  'face_image': face_image,
                  'eye_position': torch.FloatTensor(eye),
                  'gaze_field': torch.from_numpy(gaze_field),
                  'gt_position': torch.FloatTensor(gaze),
                  'gt_heatmap': torch.FloatTensor(heatmap).unsqueeze(0)
                     }

        return sample

In [7]:
#For gazefollow

batch_size = 32

train_set = GazeDataset(root_dir='/home/eee198/Documents/datasets/GazeFollowData/',
                        mat_file='/home/eee198/Documents/datasets/GazeFollowData/train_annotations.mat',
                        training='train')
train_data_loader = DataLoader(train_set, batch_size=batch_size,
                               shuffle=True, num_workers=16)

test_set = GazeDataset(root_dir='/home/eee198/Documents/datasets/GazeFollowData/',
                       mat_file='/home/eee198/Documents/datasets/GazeFollowData/test_annotations.mat',
                       training='test', include_path=True)
test_data_loader = DataLoader(test_set, batch_size=batch_size//2,
                              shuffle=False, num_workers=8)

[0.29 0.11 0.68 0.89]
[0.51090909 0.31132813]
[0.41272727 0.21429687]
[0.09 0.52 0.27 0.48]
[0.284 0.86 ]
[0.258      0.60666667]


In [10]:
#For GOO

batch_size = 32

test_set = GooDataset(root_dir='/hdd/HENRI/goosynth/test/',
                       mat_file='/hdd/HENRI/goosynth/picklefiles/testpickle120.pickle',
                       training='test', include_path=True)
test_data_loader = DataLoader(test_set, batch_size=batch_size//2,
                              shuffle=False, num_workers=8)

19200


In [11]:
sample = next(iter(test_data_loader))
print(sample.keys())

#Image check
print(len(sample['image_path']))
print(sample['image_path'][0])
print(sample['eye_position'])

eye coords: eye coords: eye coords: eye coords:     [0.3484375, 0.5020833333333333][0.1921875, 0.4][0.5390625, 0.5479166666666667][0.3765625, 0.29791666666666666]eye coords: 
eye coords:  
eye coords: 

[0.7640625, 0.38333333333333336]gaze coords: gaze coords: gaze coords: 