# HSV Histogram over time quantitative analysis

In [43]:
import pandas as pd
import numpy as np
import io
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import color

import cv2

yt_url_prefix = 'https://www.youtube.com/watch?v='
dataset_folder = "../../data"

In [44]:
downloaded = os.listdir(dataset_folder)

In [45]:
# histogram similarity metrics
def norm_hist(H):
    return H / H.size

def hist_correlation(H1, H2):
    H1_bar = H1.mean()
    H2_bar = H2.mean()
    num = ((H1 - H1_bar)*(H2 - H2_bar)).sum()
    denom = ((H1 - H1_bar)**2).sum() * ((H2 - H2_bar)**2).sum()
    denom = denom ** 0.5
    return num / denom

def hist_intersection(H1, H2):
    return np.minimum(H1, H2).sum()

def hist_bhattacharyya(H1, H2):
    return -np.log(np.sqrt(H1*H2).sum())

In [47]:
def collect_video_stats(video_file_name):
    vid = cv2.VideoCapture(video_file_name)
    frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    vid_file = os.path.basename(video_file_name)
    yt_id = vid_file.split(".")[0]
    
    i = 0
    H_frame_prev = None
    H_cut = np.zeros((255, 255, 255))
    
    frame_wise_data = {
        'correlation': [],
        'intersection': [],
        'bhattacharyya': []
    }
    cut_wise_data = {
        'correlation': [],
        'intersection': [],
        'bhattacharyya': []
    }

    while True:
        # Read image and make histogram
        success, im = vid.read()
        
        # Break if no more frames left
        if not success:# or i > 100:
            break

        # Print status update if there are more frames to process
        print(f"Processing Frame {i}/{frame_count}", end="\r")
        
        # Make Histogram
        im = np.flip(im, axis=2) # Switch BGR (openCV standard) to RGB
        im = color.rgb2hsv(im) # Convert RGB to space for different viewing

        H_frame, _ = np.histogramdd(
            im.reshape(-1, im.shape[-1]),  #flattened image
            bins=(255, 255, 255)
        )
        H_frame = norm_hist(H_frame)  # Convert to probability distribution
        
        # Collect frame_wise stats
        if H_frame_prev is not None:
            correlation = hist_correlation(H_frame, H_frame_prev)
            frame_wise_data['correlation'].append(correlation)
            frame_wise_data['intersection'].append(hist_intersection(H_frame, H_frame_prev))
            frame_wise_data['bhattacharyya'].append(hist_bhattacharyya(H_frame, H_frame_prev))

        # Increment
        H_frame_prev = H_frame
        i += 1
    
    # Make DataFrame
    return pd.DataFrame(frame_wise_data)

In [None]:
for file in downloaded:
    file_dir = os.path.join(dataset_folder, file)
    print(f"Processing Video {file_dir}")
    stats = collect_video_stats(file_dir)
    stats.to_csv(f"{file.split('.')[0]}.csv")

Processing Video ../../data/xV5udqNpP94.mp4
Processing Frame 39/2954



Processing Video ../../data/TKPmGjVFbrY.mp4
Processing Video ../../data/uiA4B5Y63IQ.mp4
Processing Video ../../data/LeLsJfGmY_Y.mp4
Processing Video ../../data/0WWzgGyAH6Y.mp4
Processing Video ../../data/tAz9Aqbpcco.mp4
Processing Video ../../data/0vh908t2W-8.mp4
Processing Video ../../data/SDofO3P2HpE.webm
Processing Video ../../data/-ziu6JzJTZ0.mp4
Processing Video ../../data/-URkBFAVZqc.mp4
Processing Video ../../data/v2uV0_1C4UM.mp4
Processing Frame 2582/3624