In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
import matplotlib.cm as cm
from scipy.misc import imresize
import cv2
import pandas as pd
import scipy.io as io

  from ._conv import register_converters as _register_converters


In [2]:
def data_extraction(path):
    data_l, data_r, data_all = [], [], []
    nor_data = io.loadmat(path)
    temp = nor_data['data']
    r = temp['right']
    l = temp['left']
    r_temp = r[0,0]
    l_temp = l[0,0]
    data_r.append(r_temp['image'][0][0])
    data_r.append(r_temp['gaze'][0][0])
    data_r.append(r_temp['pose'][0][0])
    data_l.append(l_temp['image'][0][0])
    data_l.append(l_temp['gaze'][0][0])
    data_l.append(l_temp['pose'][0][0])
    for i in range(3):
        data_all.append(data_r[i])   #0 -> r_img, 1-> l_img, 2-> r_gaze, 3->l_gaze..
        data_all.append(data_l[i])
    return data_all

In [3]:
def label_extraction(path, screen_path):
    txt = pd.read_csv(path, sep = ' ', header= None)
    screen = io.loadmat(screen_path)
    df_anno = pd.DataFrame(txt)
    data_frame = pd.DataFrame()
    data_frame['Screen_x'] = df_anno[:][24] / screen['width_pixel'][0][0]
    data_frame['Screen_y'] = df_anno[:][25] / screen['height_pixel'][0][0]
    return data_frame

In [6]:
data_test = data_extraction('C:/MPIIGaze/MPIIGaze/Data/Normalized/p00/day01.mat')
label_text = label_extraction('C:/MPIIGaze/MPIIGaze/Data/Original/p00/day01/annotation.txt',
                             'C:/MPIIGaze/MPIIGaze/Data/Original/p00/Calibration/screenSize.mat')

In [16]:
data_test[1][0].shape

(36, 60)

In [5]:
def make_dataset_img_label(image_shape, label_shape, data_list, label_list):
    length = len(data_list[0])
    left_img = np.zeros([length,image_shape])
    right_img = np.zeros([length,image_shape])
    label = np.zeros([length,label_shape])
    for n in range(length):
        left_img[n, :] = data_list[1][n].reshape(image_shape)
        right_img[n, :] = data_list[0][n].reshape(image_shape)
        label[n, :] = [label_list['Screen_x'][n], label_list['Screen_y'][n]]
        
    return right_img, left_img, label

def make_dataset_gaze_pose(data_shape, data_list):
    length = len(data_list[0])
    left_pose = np.zeros([length,data_shape])
    right_pose = np.zeros([length,data_shape])
    left_gaze = np.zeros([length,data_shape])
    right_gaze = np.zeros([length,data_shape])
    for n in range(length):
        left_gaze[n, :] = data_list[3][n]
        right_gaze[n, :] = data_list[2][n]
        left_pose[n, :] = data_list[5][n]
        right_pose[n, :] = data_list[4][n]
    return left_gaze, right_gaze, left_pose, right_pose

In [6]:
RE_img_data, LE_img_data, dataset_label = make_dataset_img_label(36 * 60, 2, data_test, label_text)
LE_gaze_data, RE_gaze_data, LE_pose_data, RE_pose_data = make_dataset_gaze_pose(3, data_test)

# Make graph for eye images

In [7]:
def model_for_image(X_img, keep_prob, name):
    W1 = tf.Variable(tf.random_normal([4, 4, 1, 5], stddev=0.01))
    L1 = tf.nn.conv2d(X_img, W1, strides=[1, 2, 2, 1], padding='SAME')
    L1 = tf.nn.relu(L1)

    print(L1)

    W2 = tf.Variable(tf.random_normal([2, 2, 5, 10], stddev=0.01))
    L2 = tf.nn.conv2d(L1, W2, strides=[1, 2, 2, 1], padding='SAME')
    L2 = tf.nn.relu(L2)
    L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
    L2_flat = tf.reshape(L2, [-1, 5 * 8 * 10])
    
    print("'L2's shape",L2.shape)
    print(L2_flat)


    W3 = tf.get_variable("W3_"+name ,shape= [400, 256],
                         initializer= tf.contrib.layers.xavier_initializer())
    b3 = tf.Variable(tf.random_normal([256]))
    L3 = tf.nn.relu(tf.matmul(L2_flat, W3) + b3)
    L3 = tf.nn.dropout(L3, keep_prob= keep_prob)

    print(L3.shape)

    W4 = tf.get_variable("W4_"+name ,shape= [256, 128],
                         initializer= tf.contrib.layers.xavier_initializer())
    b4 = tf.Variable(tf.random_normal([128]))
    L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
    L4 = tf.nn.dropout(L4, keep_prob= keep_prob)
    
    W5 = tf.get_variable("W5_"+name ,shape= [128, 64],
                         initializer= tf.contrib.layers.xavier_initializer())
    b5 = tf.Variable(tf.random_normal([64]))
    logits = tf.matmul(L4, W5) + b5
    
    return logits

# Make graph for other datas

In [8]:
def model_for_others(X, name):
    W1 = tf.get_variable("W1_"+name ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())
    b1 = tf.Variable(tf.random_normal([3]))
    L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

    print(L1.shape)

    W2 = tf.get_variable("W2_"+name ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())
    b2 = tf.Variable(tf.random_normal([3]))
    logits = tf.matmul(L1, W2) + b2
    
    return logits

# Define variable and make graphs using function

In [9]:
learning_rate = 0.001

Y = tf.placeholder(tf.float32, [None, 2])
keep_prob = tf.placeholder(tf.float32)

X_left = tf.placeholder(tf.float32, [None, 2160])
X_right = tf.placeholder(tf.float32, [None, 2160])
X_img_left = tf.reshape(X_left, [-1, 36, 60, 1])   
X_img_right = tf.reshape(X_right, [-1, 36, 60, 1]) 

X_gaze_r = tf.placeholder(tf.float32, [None, 3])
X_gaze_l = tf.placeholder(tf.float32, [None, 3])
X_pose_r = tf.placeholder(tf.float32, [None, 3])
X_pose_l = tf.placeholder(tf.float32, [None, 3])

LE_img = model_for_image(X_img_left, keep_prob, 'left_eye')  #left eye imgae
RE_img = model_for_image(X_img_right, keep_prob, 'right_eye')#right eye image
LE_pose = model_for_others(X_pose_l, 'left_pose')
LE_gaze = model_for_others(X_gaze_l, 'left_gaze')
RE_pose = model_for_others(X_pose_r, 'right_pose')
RE_gaze = model_for_others(X_gaze_r, 'right_gaze')

Instructions for updating:
Colocations handled automatically by placer.
Tensor("Relu:0", shape=(?, 18, 30, 5), dtype=float32)
'L2's shape (?, 5, 8, 10)
Tensor("Reshape_2:0", shape=(?, 400), dtype=float32)

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
(?, 256)
Tensor("Relu_4:0", shape=(?, 18, 30, 5), dtype=float32)
'L2's shape (?, 5, 8, 10)
Tensor("Reshape_3:0", shape=(?, 400), dtype=float32)
(?, 256)
(?, 3)
(?, 3)
(?, 3)
(?, 3)


# Integrate the gaze graph

In [10]:
LE_gaze_W = tf.get_variable("LE_gaze_W" ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())

RE_gaze_W = tf.get_variable("RE_gaze_W" ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())

layer_gaze = tf.nn.relu(tf.matmul(LE_gaze, LE_gaze_W) + tf.matmul(RE_gaze, RE_gaze_W))

# Integrate the pose layer

In [11]:
LE_pose_W = tf.get_variable("LE_pose_W" ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())

RE_pose_W = tf.get_variable("RE_pose_W" ,shape= [3, 3],
                         initializer= tf.contrib.layers.xavier_initializer())

layer_pose = tf.nn.relu(tf.matmul(LE_pose, LE_pose_W) + tf.matmul(RE_pose, RE_pose_W))

# Integrate the Image layer

In [12]:
LE_W_img = tf.get_variable("LE_W_img" ,shape= [64, 32],
                         initializer= tf.contrib.layers.xavier_initializer())
RE_W_img = tf.get_variable("RE_W_img" ,shape= [64, 32],
                         initializer= tf.contrib.layers.xavier_initializer())
layer_img = tf.nn.relu(tf.matmul(LE_img, LE_W_img) + tf.matmul(RE_img, RE_W_img))

img_W = tf.get_variable("img_W" ,shape= [32, 16],
                         initializer= tf.contrib.layers.xavier_initializer())
img_b = tf.Variable(tf.random_normal([16]))
layer2_img = tf.nn.relu(tf.matmul(layer_img, img_W) + img_b)

img_W2 = tf.get_variable("img_W2" ,shape= [16, 8],
                         initializer= tf.contrib.layers.xavier_initializer())
img_b2 = tf.Variable(tf.random_normal([8]))
layer3_img= tf.nn.relu(tf.matmul(layer2_img, img_W2) + img_b2)

Final_W_img = tf.get_variable("Final_W_img" ,shape= [8, 3],
                         initializer= tf.contrib.layers.xavier_initializer())
Final_b_img = tf.Variable(tf.random_normal([3]))
Final_layer_img= tf.nn.relu(tf.matmul(layer3_img, Final_W_img) + Final_b_img)

In [13]:
# Weight and bias for img layer
All_W_img = tf.get_variable("All_W_img" ,shape= [3, 2],
                         initializer= tf.contrib.layers.xavier_initializer())
# Weight and bias for pose layer
All_W_pose = tf.get_variable("All_W_pose" ,shape= [3, 2],
                         initializer= tf.contrib.layers.xavier_initializer())
# Weight and bias for gaze layer
All_W_gaze = tf.get_variable("All_W_gaze" ,shape= [3, 2],
                         initializer= tf.contrib.layers.xavier_initializer())


logits = (tf.matmul(Final_layer_img, All_W_img) 
          + tf.matmul(layer_pose, All_W_pose)
          + tf.matmul(layer_gaze, All_W_gaze))

cost = tf.reduce_mean(tf.square(logits-Y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [14]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

print('Learning started. It takes sometime.')
for i in range(len(RE_img_data)):
    feed_dict = {X_right: RE_img_data,X_left: LE_img_data,
                 X_gaze_r:RE_gaze_data, X_gaze_l:LE_gaze_data,
                 X_pose_r:RE_pose_data, X_pose_l:LE_pose_data,
                 Y:dataset_label, keep_prob: 0.8}
    c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
    if i % 100 == 0:
         print('step: {0}, cost: {1}'.format(i, c))

print('Learning Finished!')

Learning started. It takes sometime.
step: 0, cost: 11.38553524017334
step: 100, cost: 0.6470407247543335
step: 200, cost: 0.21433332562446594
step: 300, cost: 0.18179911375045776
step: 400, cost: 0.14216668903827667
step: 500, cost: 0.09956863522529602
step: 600, cost: 0.08182694762945175
step: 700, cost: 0.07897178083658218
step: 800, cost: 0.07836045324802399
step: 900, cost: 0.07815288752317429
Learning Finished!
