In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import math
import os
import time
from os.path import isfile, join

import numpy as np
import tensorflow as tf
from scipy.ndimage import gaussian_filter, zoom
from scipy.special import logsumexp

import cv2
import utils.equirec_2_perspec as E2P
from utils.read_write_h5 import store_many_hdf5

# equ = E2P.Equirectangular('src/image.jpg')    # Load equirectangular image

# FOV unit is degree
# theta is z-axis angle(right direction is positive, left direction is negative)
# phi is y-axis angle(up direction positive, down direction negative)
# height and width is output image dimension

DIR_PATH = os.getcwd()

In [3]:
def equirect_to_spherical(x_eq, y_eq, width_px, height_px):
    """
    Convert equirectangular coordinates to spherical angles in degree for use in
    equirec_2_perspec
    """
    theta_rad = (x_eq * 2 * math.pi) / width_px
    phi_rad = (-y_eq * math.pi) / height_px

    theta_deg = theta_rad * (180 / math.pi) - 180
    phi_deg = phi_rad * (180 / math.pi) + 90
    return theta_deg, phi_deg

# Validity check
equirect_to_spherical(np.array([1920, 1920]), np.array([960, 960]), 3840, 1920)

(array([-2.84217094e-14, -2.84217094e-14]),
 array([1.42108547e-14, 1.42108547e-14]))

In [4]:
tf.reset_default_graph()

check_point = os.path.join('deep_gaze', 'DeepGazeII.ckpt')  # DeepGaze II
#check_point = 'ICF.ckpt'  # ICF
new_saver = tf.train.import_meta_graph('{}.meta'.format(check_point))

input_tensor = tf.get_collection('input_tensor')[0]
centerbias_tensor = tf.get_collection('centerbias_tensor')[0]
log_density = tf.get_collection('log_density')[0]
log_density_wo_centerbias = tf.get_collection('log_density_wo_centerbias')[0]

'model_variables' collection should be of type 'byte_list', but instead is of type 'node_list'.


In [5]:
TRAIN_DIR = join(DIR_PATH, "preprocessed_train")
TEST_DIR = join(DIR_PATH, "preprocessed_test")
FILES = sorted([f for f in os.listdir(TEST_DIR) if isfile(join(TEST_DIR, f))])
for i, file in enumerate(FILES):
    print(i, file)

#FILES = ['002_08_madagascar_cuts.npy']

0 042_01_art_gallery_static.npy
1 042_02_theatre_scene_static.npy
2 042_03_lions_static_cuts.npy
3 042_04_copenhagen_harbour_static_cuts.npy
4 042_05_florida_yacht_motion.npy
5 042_07_factory_robots_static.npy
6 042_08_madagascar_cuts.npy
7 042_11_lohdi_garden_india_cuts.npy
8 042_12_gym_workout_static.npy
9 042_13_times_square_static.npy
10 042_16_interview_static.npy
11 042_17_weather_forecast_static.npy
12 042_18_bomb_trapped_static.npy
13 042_20_car_fix_static.npy
14 042_21_operation_room_static.npy
15 042_23_dog_food_factory_motion.npy
16 043_01_art_gallery_static.npy
17 043_02_theatre_scene_static.npy
18 043_03_lions_static_cuts.npy
19 043_04_copenhagen_harbour_static_cuts.npy
20 043_07_factory_robots_static.npy
21 043_08_madagascar_cuts.npy
22 043_09_elephants_static.npy
23 043_11_lohdi_garden_india_cuts.npy
24 043_13_times_square_static.npy
25 043_16_interview_static.npy
26 043_17_weather_forecast_static.npy
27 043_18_bomb_trapped_static.npy
28 043_20_car_fix_static.npy
29 043_

In [6]:
FOVE_FOV = 100 # 100 degrees
FOVE_WIDTH_PX = 512 # 2560/5
FOVE_HEIGHT_PX = 288 # 1440/5

FINAL_WIDTH_PX = 128 # 2560/20
FINAL_HEIGHT_PX = 72 # 1440/20

# load precomputed log density over a 1024x1024 image
centerbias_template = np.load(os.path.join('deep_gaze', 'centerbias.npy'))
# rescale to match image size
centerbias = zoom(centerbias_template, (FOVE_HEIGHT_PX/1024, FOVE_WIDTH_PX/1024),
                  order=0, mode='nearest')
# renormalize log density
centerbias -= logsumexp(centerbias)

centerbias_data = centerbias[np.newaxis, :, :, np.newaxis]  # BHWC, 1 channel (log density)

In [7]:
def resize_images(images, height_output, width_output):
    """
    Returns two arrays:
        resized_images is an array of resized images (N, height_output, width_output, CHANNELS)
    """

    resized_images = None

    for full_size_image in images:
        # Read and resize image
        if resized_images is None:
            resized_images = np.expand_dims(cv2.resize(full_size_image,
                                                       (width_output, height_output),
                                                       interpolation=cv2.INTER_CUBIC), axis=0)
        else:
            resized_images = np.vstack((
                resized_images,
                np.expand_dims(cv2.resize(full_size_image, (width_output, height_output),
                                          interpolation=cv2.INTER_CUBIC), axis=0)
            ))

    # if gray image
    if resized_images.ndim == 3:
        resized_images = np.expand_dims(resized_images, axis=3)
    return resized_images

In [8]:
#COLUMNS = ['x', 'y', 'x_head', 'y_head', 'angle_deg_head', 'quaternion_hp_w',
#        'quaternion_hp_x', 'quaternion_hp_y', 'quaternion_hp_z', 'frameId']

#session_conf = tf.ConfigProto()
#session_conf.gpu_options.allow_growth = True
with tf.Session() as sess:
    new_saver.restore(sess, check_point)
    start_index = 0
    VIDEO_WIDTH = 3840
    dir_path_fov = join(DIR_PATH, "fov_images_test_h5_new", "original")
    dir_path_fov_saliency = join(DIR_PATH, "fov_images_test_h5_new", "saliency")
    for file in FILES[start_index:]:
        start_time_video = time.time()
        video_data = np.load(join(DIR_PATH, "preprocessed_test", file))
        video_name_user = file[:-4]
        video_name = file[4:-4]
        video_path = join(DIR_PATH, 'datasetTUM', 'videos', video_name + ".mp4")
        print(video_name_user)
        video_height = video_data[0, 1] * 2

        original_images = None
        sal_images = None

        vidcap = cv2.VideoCapture(video_path)

        success, image_frame = vidcap.read()
        frame_id = 0

        start_time_video = time.time()
        for i, row in enumerate(video_data):
            frame_id_row = int(row[9])
            if frame_id_row > frame_id:
                # Compute mean of frame data
                frame_data = np.mean(video_data[video_data[:, 9].astype(int) == frame_id], axis=0)
                #print(frame_data.shape)
                theta, phi = equirect_to_spherical(frame_data[2], frame_data[3],
                                                        VIDEO_WIDTH, video_height)
                success, image_frame = vidcap.read()
                #print(success, int(frame_id_row), frame_id)
                if success:
                    # Create FoV image
                    equ = E2P.Equirectangular(image_frame)
                    #print(row[0], row[1], x_head_eq, y_head_eq, theta, phi)
                    img_fov = equ.get_perspective(FOVE_FOV, theta, phi,
                                                 FOVE_HEIGHT_PX, FOVE_WIDTH_PX)
                    img_fov = np.expand_dims(img_fov, axis=0)
                    #print("img_fov", img_fov.shape)
                    if original_images is not None:
                        original_images = np.vstack((original_images, img_fov))
                    else:
                        original_images = img_fov
                    #print(frame_id)
                    frame_id += 1
                else:
                    print("No more frames")
                    break


        #original_images = original_images / np.amax(original_images)
        print("original_images", original_images.shape)
        tiny_images = resize_images(original_images, FINAL_HEIGHT_PX, FINAL_WIDTH_PX)
        print("tiny_images", tiny_images.shape)
        store_many_hdf5(tiny_images, dir_path_fov, video_name_user)
        # Apply DeepGaze II to FoV images
        batch_size = 2**5
        print("Starting generation of saliency maps")
        for index in range(0, len(original_images), batch_size):
            batch_original_images = original_images[index:min(index+batch_size,
                                                              len(original_images))]
            batch_log_density_prediction = sess.run(log_density, {
                input_tensor: batch_original_images,
                centerbias_tensor: centerbias_data,
            })
            batch_saliency_images = np.exp(batch_log_density_prediction)
            for i, sal_image in enumerate(batch_saliency_images):
                sal_image = gaussian_filter(sal_image, sigma=5)
                sal_image = sal_image / np.amax(sal_image) * 255
                batch_saliency_images[i] = sal_image

            if sal_images is not None:
                sal_images = np.vstack((sal_images, batch_saliency_images))
            else:
                sal_images = batch_saliency_images

        print("sal_images", sal_images.shape)
        tiny_sal_images = resize_images(sal_images, FINAL_HEIGHT_PX, FINAL_WIDTH_PX)
        print("tiny_sal_images", tiny_sal_images.shape)
        store_many_hdf5(tiny_sal_images, dir_path_fov_saliency, video_name_user)

        print("Time per video: " + str(time.time() - start_time_video) + " seconds,",
              str((time.time() - start_time_video)/60) + " minutes.")

INFO:tensorflow:Restoring parameters from deep_gaze/DeepGazeII.ckpt
042_01_art_gallery_static
original_images (1836, 288, 512, 3)
tiny_images (1836, 72, 128, 3)
Starting generation of saliency maps
sal_images (1836, 288, 512, 1)
tiny_sal_images (1836, 72, 128, 1)
Time per video: 462.74562644958496 seconds, 7.712427194913229 minutes.
042_02_theatre_scene_static
original_images (1852, 288, 512, 3)
tiny_images (1852, 72, 128, 3)
Starting generation of saliency maps
sal_images (1852, 288, 512, 1)
tiny_sal_images (1852, 72, 128, 1)
Time per video: 452.5006220340729 seconds, 7.5416771133740745 minutes.
042_03_lions_static_cuts
No more frames
original_images (1860, 288, 512, 3)
tiny_images (1860, 72, 128, 3)
Starting generation of saliency maps
sal_images (1860, 288, 512, 1)
tiny_sal_images (1860, 72, 128, 1)
Time per video: 457.5896077156067 seconds, 7.626493545373281 minutes.
042_04_copenhagen_harbour_static_cuts
No more frames
original_images (1860, 288, 512, 3)
tiny_images (1860, 72, 128,

  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  idx1 = xyz[:, 0] > 0
  idx2 = xyz[:, 1] > 0


original_images (1829, 288, 512, 3)
tiny_images (1829, 72, 128, 3)
Starting generation of saliency maps
sal_images (1829, 288, 512, 1)
tiny_sal_images (1829, 72, 128, 1)
Time per video: 444.7702827453613 seconds, 7.412838125228882 minutes.
042_17_weather_forecast_static
original_images (1851, 288, 512, 3)
tiny_images (1851, 72, 128, 3)
Starting generation of saliency maps
sal_images (1851, 288, 512, 1)
tiny_sal_images (1851, 72, 128, 1)
Time per video: 457.07122683525085 seconds, 7.617853856086731 minutes.
042_18_bomb_trapped_static
original_images (1801, 288, 512, 3)
tiny_images (1801, 72, 128, 3)
Starting generation of saliency maps
sal_images (1801, 288, 512, 1)
tiny_sal_images (1801, 72, 128, 1)
Time per video: 434.8360290527344 seconds, 7.247267230351766 minutes.
042_20_car_fix_static
original_images (1918, 288, 512, 3)
tiny_images (1918, 72, 128, 3)
Starting generation of saliency maps
sal_images (1918, 288, 512, 1)
tiny_sal_images (1918, 72, 128, 1)
Time per video: 481.690147399

tiny_sal_images (1849, 72, 128, 1)
Time per video: 452.77804613113403 seconds, 7.546300848325093 minutes.
044_13_times_square_static
No more frames
original_images (1860, 288, 512, 3)
tiny_images (1860, 72, 128, 3)
Starting generation of saliency maps
sal_images (1860, 288, 512, 1)
tiny_sal_images (1860, 72, 128, 1)
Time per video: 461.08411288261414 seconds, 7.6847352941830955 minutes.
044_14_new_orleans_drive_motion
original_images (1796, 288, 512, 3)
tiny_images (1796, 72, 128, 3)
Starting generation of saliency maps
sal_images (1796, 288, 512, 1)
tiny_sal_images (1796, 72, 128, 1)
Time per video: 431.5882031917572 seconds, 7.19313679933548 minutes.
044_16_interview_static
original_images (1850, 288, 512, 3)
tiny_images (1850, 72, 128, 3)
Starting generation of saliency maps
sal_images (1850, 288, 512, 1)
tiny_sal_images (1850, 72, 128, 1)
Time per video: 457.3641984462738 seconds, 7.622736978530884 minutes.
044_17_weather_forecast_static
No more frames
original_images (1860, 288, 5

original_images (1346, 288, 512, 3)
tiny_images (1346, 72, 128, 3)
Starting generation of saliency maps
sal_images (1346, 288, 512, 1)
tiny_sal_images (1346, 72, 128, 1)
Time per video: 268.5711874961853 seconds, 4.476186541716258 minutes.
046_07_factory_robots_static
original_images (1797, 288, 512, 3)
tiny_images (1797, 72, 128, 3)
Starting generation of saliency maps
sal_images (1797, 288, 512, 1)
tiny_sal_images (1797, 72, 128, 1)
Time per video: 430.9171950817108 seconds, 7.181953330834706 minutes.
046_09_elephants_static
No more frames
original_images (1860, 288, 512, 3)
tiny_images (1860, 72, 128, 3)
Starting generation of saliency maps
sal_images (1860, 288, 512, 1)
tiny_sal_images (1860, 72, 128, 1)
Time per video: 458.4887354373932 seconds, 7.641479003429413 minutes.
046_11_lohdi_garden_india_cuts
original_images (1857, 288, 512, 3)
tiny_images (1857, 72, 128, 3)
Starting generation of saliency maps
sal_images (1857, 288, 512, 1)
tiny_sal_images (1857, 72, 128, 1)
Time per vi

sal_images (1801, 288, 512, 1)
tiny_sal_images (1801, 72, 128, 1)
Time per video: 436.80099654197693 seconds, 7.280016688505809 minutes.
048_01_art_gallery_static
original_images (1855, 288, 512, 3)
tiny_images (1855, 72, 128, 3)
Starting generation of saliency maps
sal_images (1855, 288, 512, 1)
tiny_sal_images (1855, 72, 128, 1)
Time per video: 460.7083704471588 seconds, 7.67847292025884 minutes.
048_02_theatre_scene_static
original_images (1855, 288, 512, 3)
tiny_images (1855, 72, 128, 3)
Starting generation of saliency maps
sal_images (1855, 288, 512, 1)
tiny_sal_images (1855, 72, 128, 1)
Time per video: 455.3181366920471 seconds, 7.58863567908605 minutes.
048_03_lions_static_cuts
original_images (1842, 288, 512, 3)
tiny_images (1842, 72, 128, 3)
Starting generation of saliency maps
sal_images (1842, 288, 512, 1)
tiny_sal_images (1842, 72, 128, 1)
Time per video: 449.16934037208557 seconds, 7.48615574836731 minutes.
048_04_copenhagen_harbour_static_cuts
original_images (1852, 288, 

original_images (1824, 288, 512, 3)
tiny_images (1824, 72, 128, 3)
Starting generation of saliency maps
sal_images (1824, 288, 512, 1)
tiny_sal_images (1824, 72, 128, 1)
Time per video: 439.91705894470215 seconds, 7.331951065858205 minutes.
049_18_bomb_trapped_static
original_images (1777, 288, 512, 3)
tiny_images (1777, 72, 128, 3)
Starting generation of saliency maps
sal_images (1777, 288, 512, 1)
tiny_sal_images (1777, 72, 128, 1)
Time per video: 427.2200677394867 seconds, 7.120334545771281 minutes.
049_20_car_fix_static
original_images (1889, 288, 512, 3)
tiny_images (1889, 72, 128, 3)
Starting generation of saliency maps
sal_images (1889, 288, 512, 1)
tiny_sal_images (1889, 72, 128, 1)
Time per video: 467.9219398498535 seconds, 7.798699084917704 minutes.
049_21_operation_room_static
original_images (1874, 288, 512, 3)
tiny_images (1874, 72, 128, 3)
Starting generation of saliency maps
sal_images (1874, 288, 512, 1)
tiny_sal_images (1874, 72, 128, 1)
Time per video: 463.28740715980