In [12]:
import pandas as pd
import numpy as np

import io
import os
import time

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import color
from skimage.transform import rescale, resize

import moviepy.editor as mp

import cv2

yt_url_prefix = 'https://www.youtube.com/watch?v='
dataset_folder = "../../data"

In [13]:
cinematic_folder = "../../data"
cinematic = [
    os.path.join(cinematic_folder, f)
    for f 
    in os.listdir(cinematic_folder) 
    if f[-4:] == ".mp4"
]
len(cinematic), cinematic[0]

(1855, '../../data/xV5udqNpP94.mp4')

In [14]:
casual_folder = "../../data/casual"
casual = [
    os.path.join(casual_folder, f)
    for f 
    in os.listdir(casual_folder) 
    if f[-4:] == ".mp4"
]
len(casual), casual[0]

(20, '../../data/casual/sCFi2O1vyWQ.mp4')

In [15]:
def rgb_to_chroma_hist(rgb, bin_res):
    # RGB to Chroma Hist
    rgb = rgb / 255
    rgb_rsz_resh = np.reshape(rgb, (-1, 3))
    rgb_uv = rgb_rsz_resh[:,:2]
    rgb_uv[:,0] = np.log(rgb_rsz_resh[:,1]/(rgb_rsz_resh[:,0]+1e-6)+1e-6)
    rgb_uv[:,1] = np.log(rgb_rsz_resh[:,1]/(rgb_rsz_resh[:,2]+1e-6)+1e-6)
    #print(rgb_rsz_resh)
    hist, edges = np.histogramdd(rgb_uv, bins=bin_res, range=[[-1,1],[-1,1]], density=False)
    hist /= hist.max()
    hist = np.sqrt(hist)
    return hist, edges

# Old version adapted directly from Cecilia
"""def rgb_to_chroma_hist(rgb, bin_res):
    # RGB to Chroma Hist
    rgb_rsz = resize(rgb, (rgb.shape[0], rgb.shape[1]), anti_aliasing=True)
    rgb_rsz_resh = np.reshape(rgb_rsz, (-1, 3))
    rgb_uv = rgb_rsz_resh[:,:2]
    rgb_uv[:,0] = np.log(rgb_rsz_resh[:,1]/(rgb_rsz_resh[:,0]+1e-6)+1e-6)
    rgb_uv[:,1] = np.log(rgb_rsz_resh[:,1]/(rgb_rsz_resh[:,2]+1e-6)+1e-6)
    #print(rgb_rsz_resh)
    hist, edges = np.histogramdd(rgb_uv, bins=bin_res, range=[[-1,1],[-1,1]], density=False)
    hist /= hist.max()
    hist = np.sqrt(hist)
    return hist, edges"""

def chroma_hist_to_rgb_vis(hist, edges, bin_res):
    # Hist to Image
    hist_vis = np.zeros((hist.shape[0], hist.shape[1], 3))
    # green
    hist_vis[:, :, 1] = hist
    # red
    hist_vis[:, :, 0] = hist / np.exp(edges[0][:-1])[:, None]
    # blue
    hist_vis[:, :, 2] = hist / np.exp(edges[1][:-1])[None, :]

    hist_vis = np.clip(hist_vis,0,1)
    hist_vis *= 255
    hist_vis = np.uint8(hist_vis)
    return hist_vis


# Old version adapted directly from Cecilia
"""def chroma_hist_to_rgb_vis(hist, edges, bin_res):
    # Hist to Image
    hist_vis = np.zeros((hist.shape[0], hist.shape[1], 3))
    #rgb_vis = np.zeros((hist.shape[0], hist.shape[1], 3))

    for i in range(bin_res[0]):
        for j in range(bin_res[1]):
            green = hist[i,j]
            red = green / np.exp(edges[0][i]) 
            blue = green / np.exp(edges[1][j])
    #         print(red)
            hist_vis[i, j, 0] = red
            hist_vis[i, j, 1] = green
            hist_vis[i, j, 2] = blue

    hist_vis = np.clip(hist_vis,0,1)
    hist_vis *= 255
    hist_vis = np.uint8(hist_vis)
    return hist_vis"""

'def chroma_hist_to_rgb_vis(hist, edges, bin_res):\n    # Hist to Image\n    hist_vis = np.zeros((hist.shape[0], hist.shape[1], 3))\n    #rgb_vis = np.zeros((hist.shape[0], hist.shape[1], 3))\n\n    for i in range(bin_res[0]):\n        for j in range(bin_res[1]):\n            green = hist[i,j]\n            red = green / np.exp(edges[0][i]) \n            blue = green / np.exp(edges[1][j])\n    #         print(red)\n            hist_vis[i, j, 0] = red\n            hist_vis[i, j, 1] = green\n            hist_vis[i, j, 2] = blue\n\n    hist_vis = np.clip(hist_vis,0,1)\n    hist_vis *= 255\n    hist_vis = np.uint8(hist_vis)\n    return hist_vis'

In [16]:
def print_ndarray_metrics(m, name=None):
    print(name)
    print(f"Shape: {m.shape}")
    print(f"Dtype: {m.dtype}")
    print(f"Min: {m.min()}")
    print(f"Max: {m.max()}")
    print(f"Mean: {m.mean()}")
    print(f"Std: {m.std()}")
    print(m)

In [17]:
# histogram similarity metrics
def norm_hist(H):
    return H / H.sum()

def hist_correlation(H1, H2):
    H1_bar = H1.mean()
    H2_bar = H2.mean()
    num = ((H1 - H1_bar)*(H2 - H2_bar)).sum()
    denom = ((H1 - H1_bar)**2).sum() * ((H2 - H2_bar)**2).sum()
    denom = denom ** 0.5
    return num / denom

def hist_intersection(H1, H2):
    return np.minimum(H1, H2).sum()

def hist_bhattacharyya(H1, H2):
    return -np.log(np.sqrt(H1*H2).sum())

def compare_histograms(H1, H2, data_dict=None, comparison_functions=[hist_correlation, hist_intersection, hist_bhattacharyya]):
    H1, H2 = norm_hist(H1), norm_hist(H2) # Convert to probability distribution
    if data_dict is None:
        data_dict = {f.__name__: [] for f in comparison_functions} 
    for f in comparison_functions:
        data_dict[f.__name__].append(f(H1, H2))
    return data_dict

In [18]:
class Timer:
    def __init__(self, name=""):
        self.name=name
        self.time = 0
        self.num_calls = 0
        self._last_start_time = 0
    
    def start(self):
        self._last_start_time = time.time()
        
    def stop(self):
        self.time += time.time() - self._last_start_time
        self.num_calls += 1

    def print_avg(self):
        if self.num_calls > 0:
            print(f"Average time for function {self.name}: {self.time / self.num_calls}")

In [19]:
def collect_video_stats(
        vid_file_dir,
        vid_cut_folder,
        lch_vis_folder,
        subsequent_frame_folder,
        subsequent_cut_folder,
        cut_stddev_folder,
        lch_vis_recalculate = False,
        subsequent_frame_recalculate = False,
        subsequent_cut_recalculate = False,
        cut_stddev_recalculate = False
    ):
    # Prep main constants
    vid_file = os.path.basename(vid_file_dir)
    yt_id = vid_file.split(".")[0]
    clip = mp.VideoFileClip(vid_file_dir)
    clip_resized = clip.resize(height=240) # make the height 240px ( According to moviePy documenation The width is then computed so that the width/height ratio is conserved.)
    frame_count = int(clip.fps * clip.duration)
    iter_time = Timer("Total Iteration")
    
    # Prep histogram constants
    bin_size = (50, 50)
    hist_time = Timer("hist")
    
    # Prep video cut constants
    vid_cut_file = f'{yt_id}_cuts.npy'
    vid_cut_dir = os.path.join(vid_cut_folder, vid_cut_file)
    vid_cuts = np.load(vid_cut_dir)
    #print(f"Vid Cuts: {vid_cuts}")
    
    # Prep log-chroma-histogram constants
    lch_vid_file = f"{yt_id}_rgb.avi"
    lch_vid_dir = os.path.join(lch_vis_folder,lch_vid_file)
    do_lch_vid = (not os.path.exists(lch_vid_dir)) or lch_vis_recalculate
    if do_lch_vid:
        vid_time = Timer("vis")
        vid_lch = cv2.VideoWriter(lch_vid_dir, cv2.VideoWriter_fourcc(*"MJPG"), clip.fps, bin_size)
#    else:
#        print("Skipping LCH Vis...")
    
    # Prep subsequent frame analysis constants
    subsequent_frame_file = f"{yt_id}_byFrame.csv"
    subsequent_frame_dir = os.path.join(
        subsequent_frame_folder,
        subsequent_frame_file
    )
    do_subsequent_frame = (not os.path.exists(subsequent_frame_dir)) or subsequent_frame_recalculate
    if do_subsequent_frame:
        H_frame_prev = None
        frame_wise_data = None # Signals to compare_histograms that we need to replace this with a data structure
#    else:
#        print("Skipping Subsequent Frame Analysis...")
    
    # Prep subsequent cut analysis constants
    subsequent_cut_file = f"{yt_id}_byCut.csv"
    subsequent_cut_dir = os.path.join(
        subsequent_cut_folder,
        subsequent_cut_file
    )
    do_subsequent_cut = (not os.path.exists(subsequent_cut_dir)) or subsequent_cut_recalculate
    H_cut = None
    H_cut_prev = None
    if do_subsequent_cut:
        cut_wise_data = None # Signals to compare_histograms that we need to replace this with a data structure
#    else:
#        print("Skipping Subsequent Cut Analysis...")
    
    # Prep cut sttdev analysis constants
    cut_stddev_file = f"{yt_id}_cutStddev.npy"
    cut_stddev_dir = os.path.join(
        cut_stddev_folder,
        cut_stddev_file
    )
    do_cut_stddev = (not os.path.exists(cut_stddev_dir)) or cut_stddev_recalculate
    if do_cut_stddev:
        cuts_for_stddev = []
#    else:
#        print("Skipping Cut StdDev Analysis")
    
    if do_lch_vid or do_subsequent_frame or do_subsequent_cut or do_cut_stddev:
        for i, f_rgb in enumerate(clip_resized.iter_frames()):
            iter_time.start()
            # Print status update if there are more frames to process
            print(f"Processing Frame {i}/{frame_count}", end="\r")

#            if i > 500:
#                break

            # Reset our histogram and collect data if we have encountered a cut
            if i in vid_cuts:
                if do_cut_stddev and H_cut is not None:
                    H_cut_n = norm_hist(H_cut)
                    #print_ndarray_metrics(H_cut_n, "H_cut_n")
                    cuts_for_stddev.append(H_cut_n)
                # Collect cut_wise stats
                if do_subsequent_cut and H_cut_prev is not None:
                    cut_wise_data = compare_histograms(H_cut, H_cut_prev, cut_wise_data)
                # (Re)set aggregation variables
                H_cut_prev = H_cut
                H_cut = np.zeros(bin_size)

            # Make Histogram
            hist_time.start()
            H_frame, edges = rgb_to_chroma_hist(f_rgb, bin_size)
            #print(edges)
            hist_time.stop()

            # Write visualized histogram to video
            if do_lch_vid:
                vid_time.start()
                H_frame_vis = chroma_hist_to_rgb_vis(H_frame, edges, bin_size)
                H_frame_vis = np.flip(H_frame_vis, axis=2) # Switch RGB to BGR (openCV standard)
                vid_time.stop()
                vid_lch.write(H_frame_vis)

            # Collect frame_wise stats
            if do_subsequent_frame and H_frame_prev is not None:
                frame_wise_data = compare_histograms(H_frame, H_frame_prev, frame_wise_data)

            # Increment
            H_cut += H_frame
            H_frame_prev = H_frame
            
            iter_time.stop()

        # Finish last iteration for some analyses:
        if do_cut_stddev:
            cuts_for_stddev.append(norm_hist(H_cut))
        if do_subsequent_cut:
            cut_wise_data = compare_histograms(H_cut, H_cut_prev, cut_wise_data)

        # Finalize log-chroma-histogram vis:
        if do_lch_vid:
            vid_lch.release()
            vid_time.print_avg()

        # Finalize log-chroma-histogram vis:
        hist_time.print_avg()

        # Finalize subsequent frame analysis:
        if do_subsequent_frame:
            pd.DataFrame(frame_wise_data).to_csv(subsequent_frame_dir) # Write DataFrame to file

        # Finalize subsequent cut analysis:
        if do_subsequent_cut:
            pd.DataFrame(cut_wise_data).to_csv(subsequent_cut_dir) # Write DataFrame to file

        # Finalize cut stddev analysis:
        if do_cut_stddev:
            #print(cuts_for_stddev)
            cuts_sttdev_stack = np.stack(cuts_for_stddev)
            #print_ndarray_metrics(cuts_sttdev_stack, "cuts_sttdev_stack")
            cuts_sttdev = np.nanstd(cuts_sttdev_stack, axis=0) # Ignoring
            if np.isnan(cuts_sttdev[0,0]): #Trying to avoid strange error
                print("Stddev Calculation Failed!")
                #print(cuts_for_stddev)
                #print_ndarray_metrics(cuts_sttdev, "cuts_sttdev")
            else:
                np.save(cut_stddev_dir, cuts_sttdev)
        
        # Show total iteration time:
        iter_time.print_avg()

        #plt.imshow(cuts_sttdev)
    #print()


In [20]:
def collect_temporal_stats(vid_files, start_index=0, *args, **kwargs):
    for i, vid_dir in enumerate(vid_files):
        if i < start_index:
            continue
        try:
            vid_file = os.path.basename(vid_dir)
            yt_id = vid_file.split(".")[0]
            print(f"Processing Video {vid_dir} - {i}/{len(vid_files)}")
            collect_video_stats(vid_dir, *args, **kwargs)
        except Exception as e:
            print(e)

In [None]:
collect_temporal_stats(
    cinematic,
    vid_cut_folder="../../data/cuts/",
    subsequent_frame_folder="../../data/analysis/variation_hist_metrics",
    subsequent_cut_folder="../../data/analysis/variation_hist_metrics",
    cut_stddev_folder="../../data/analysis/temporal_hist_std",
    lch_vis_folder="../../data/analysis/variation_hist_videos",
    subsequent_frame_recalculate = True,
    subsequent_cut_recalculate = True,
    cut_stddev_recalculate = True   
)

Processing Video ../../data/xV5udqNpP94.mp4 - 0/1855
Average time for function hist: 0.005293032925342747
Average time for function Total Iteration: 0.0055592226296310296
Processing Video ../../data/TKPmGjVFbrY.mp4 - 1/1855
Average time for function hist: 0.005045327516986046
Average time for function Total Iteration: 0.005290417292548667
Processing Video ../../data/uiA4B5Y63IQ.mp4 - 2/1855
Average time for function hist: 0.005302805093312966
Average time for function Total Iteration: 0.005567818232782892
Processing Video ../../data/LeLsJfGmY_Y.mp4 - 3/1855
Average time for function hist: 0.00480132800888042
Average time for function Total Iteration: 0.0050645900928244295
Processing Video ../../data/0WWzgGyAH6Y.mp4 - 4/1855
Average time for function hist: 0.004954180488847706
Average time for function Total Iteration: 0.005209550647067879
Processing Video ../../data/QJDRdGxGw8U.mp4 - 5/1855
Average time for function hist: 0.004901051437996385
Average time for function Total Iteration: 

In [11]:
collect_temporal_stats(
    casual,
    vid_cut_folder="../../data/casual/cuts/",
    subsequent_frame_folder="../../data/casual/analysis/variation_hist_metrics",
    subsequent_cut_folder="../../data/casual/analysis/variation_hist_metrics",
    cut_stddev_folder="../../data/casual/analysis/temporal_hist_std",
    lch_vis_folder="../../data/casual/analysis/variation_hist_videos"
)

Processing Video ../../data/casual/sCFi2O1vyWQ.mp4 - 0/20
Average time for function hist: 0.007766343483722013
Average time for function Total Iteration: 0.008158449963376228
Processing Video ../../data/casual/rtMSMWm-xw0.mp4 - 1/20
Average time for function hist: 0.0065329199033344046
Average time for function Total Iteration: 0.006826567335636539
Processing Video ../../data/casual/ggNIcxiUv6Q.mp4 - 2/20
Processing Video ../../data/casual/J_EDZOodc14.mp4 - 3/20
Processing Video ../../data/casual/WE5uaL0urok.mp4 - 4/20
Processing Video ../../data/casual/FZ7wBmLvcSA.mp4 - 5/20
Processing Video ../../data/casual/bHKQfui91yc.mp4 - 6/20
Processing Video ../../data/casual/yAFmSm5l8gE.mp4 - 7/20
Processing Video ../../data/casual/I_1oQYpONjs.mp4 - 8/20
'NoneType' object has no attribute 'size'
Processing Video ../../data/casual/x2irNRNLrrI.mp4 - 9/20
Processing Video ../../data/casual/O2VSHC9DFbQ.mp4 - 10/20
Processing Video ../../data/casual/4hUhAFVpip8.mp4 - 11/20
Processing Video ../../da