In [1]:
# import Section
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from os import listdir
from os.path import isfile, join, isdir
import re
from matplotlib import pyplot as plt
import seaborn as sns
import glob
import re
from scipy.interpolate import interp1d
from tqdm import tqdm, tqdm_notebook

In [2]:
# Function Section
def calculate_pad(brightness, saturation):
    p = 0.69*brightness + 0.22*saturation
    a = -0.31*brightness + 0.6*saturation
    d = 0.76*brightness + 0.32*saturation
    return [p,a,d]

def calculate_pad_scene(scene):
    pads = []
    for img in scene:
        temp_b = mean_brightness(img)
        temp_s = mean_saturation(img)
        pads.append(calculate_pad(temp_b, temp_s))
    return np.mean([x[0] for x in pads]), np.mean([x[1] for x in pads]), np.mean([x[2] for x in pads])

def calculate_blur(img):
    return cv2.Laplacian(img, cv2.CV_64F).var()

def calculate_blur_scene(scene):
    blurs = []
    for img in scene:
        blurs.append(calculate_blur(img))
    return np.mean(blurs)

def mean_brightness(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) #convert it to hsv
    return np.mean(hsv[:,:,2])

def mean_saturation(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) #convert it to hsv
    return np.mean(hsv[:,:,1])

def calculate_opticalFlow(img1, img2):
    prev = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    forward = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
    mask = np.zeros_like(prev)
    mask[..., 1] = 255
    flow = cv2.calcOpticalFlowFarneback(prev, forward, flow=None, pyr_scale=0.5,levels =1,winsize=3,iterations=15,poly_n=3,poly_sigma=5,flags=cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    return cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)[0]

def calculate_opticalFlow_scene(scene):
    first = calculate_opticalFlow(scene[0], scene[1])
    second = calculate_opticalFlow(scene[1], scene[2])
    return np.mean([first, second])
    
def isjpg(filepath):
    return re.search(".jpg$", filepath)

In [8]:
# constant
base = 'data\\scenes'
movies = [x for x in listdir(base) if isdir(join(base, x)) and x != 'incredibles_example' and x != 'faces']
movies_paths = [join(base, x) for x in movies]
display(movies)
display(movies_paths)
img_paths = {}
for i in range(len(movies)):
    movie = movies[i]
    movie_path = movies_paths[i]
    files = [join(movie_path, f) for f in listdir(movie_path) if isjpg(join(movie_path, f))]
    img_paths[movie] = files
display([len(x) for x in img_paths.values()])

['big_hero_6', 'cars_3', 'incredible_2', 'toy_story_4', 'up', 'wall_e_']

['data\\scenes\\big_hero_6',
 'data\\scenes\\cars_3',
 'data\\scenes\\incredible_2',
 'data\\scenes\\toy_story_4',
 'data\\scenes\\up',
 'data\\scenes\\wall_e_']

[90, 90, 90, 90, 90, 90]

In [None]:
# data preprocessing
scene_names = []
scene_avg_ps = []
scene_avg_as = []
scene_avg_ds = []
scene_avg_blurs = []
scene_avg_optical_flows = []
scene_movies = []
scene_paths = []
for movie in tqdm_notebook(img_paths.keys()):
    display('preprocessing scenes in {m}'.format(m = movie))
    lst = img_paths[movie]
    for i in tqdm_notebook(range(0, 90, 3)):
        scene_num = lst[i][-8:-6].replace('-', '')
        scene_names.append(movie + scene_num)
        temp_imgs = []
        flag = False
        paths = []
        for j in range(3):
            img = cv2.imread(lst[i+j])
            paths.append(lst[i+j])
            if type(img) != type(None):
                img = cv2.resize(img, (320, 768))
                temp_imgs.append(img)
            else:
                flag = True
        temp_pad = calculate_pad_scene(temp_imgs)
        scene_avg_ps.append(temp_pad[0])
        scene_avg_as.append(temp_pad[1])
        scene_avg_ds.append(temp_pad[2])
        scene_avg_blurs.append(calculate_blur_scene(temp_imgs))
        scene_movies.append(movie)
        scene_paths.append(paths)
        if not flag:
            scene_avg_optical_flows.append(calculate_opticalFlow_scene(temp_imgs))
        else:
            scene_avg_optical_flows.append(np.nan)

HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

'preprocessing scenes in big_hero_6'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

'preprocessing scenes in cars_3'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

'preprocessing scenes in incredible_2'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

'preprocessing scenes in toy_story_4'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

'preprocessing scenes in up'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

'preprocessing scenes in wall_e_'

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

In [None]:
clean = pd.DataFrame()
clean['scene_name'] = scene_names
clean['scene_avg_p'] = scene_avg_ps
clean['scene_avg_a'] = scene_avg_as
clean['scene_avg_d'] = scene_avg_ds
clean['scene_avg_blur'] = scene_avg_blurs
clean['scene_avg_optical_flow'] = scene_avg_optical_flows
clean['scene_movie'] = scene_movies
clean['paths'] = scene_paths
display(clean.shape)
display(clean.head())
clean.to_csv('clean_df.csv',  index=False)