In [1]:
# Install Dependencies
#!pip install scikit-image
#!pip install scenedetect[opencv,progress_bar]

In [2]:
from __future__ import print_function
import io
import os

import pandas as pd
import numpy as np

from skimage import color
import cv2

import scenedetect
from scenedetect.video_manager import VideoManager
from scenedetect.scene_manager import SceneManager
from scenedetect.frame_timecode import FrameTimecode
from scenedetect.stats_manager import StatsManager
from scenedetect.detectors import ContentDetector

STATS_FILE_PATH = 'testvideo.stats.csv'
dataset_folder = "../../data/"
cuts_folder = "../../data/cuts/"
analysis_folder = "../../data/analysis/variation"

In [3]:
downloaded = [f for f in os.listdir(dataset_folder) if os.path.isfile(os.path.join(dataset_folder, f))]

# Find video cuts

In [4]:
for i, vid_file in enumerate(downloaded):
    video_dir = os.path.join(dataset_folder, vid_file)
    print(f"Finding cuts of Video {video_dir}: {i+1}/{len(downloaded)}", end='\r')
    
    # Check for already existing files and skip if already calculated
    yt_id = vid_file.split('.')[0]
    vid_cut_file = f'{yt_id}_cuts.npy'
    vid_cut_dir = os.path.join(cuts_folder, vid_cut_file)
    if os.path.exists(vid_cut_dir):
        continue
    
    # Create a video_manager point to video file testvideo.mp4. Note that multiple
    # videos can be appended by simply specifying more file paths in the list
    # passed to the VideoManager constructor. Note that appending multiple videos
    # requires that they all have the same frame size, and optionally, framerate.
    try:
        video_manager = VideoManager([video_dir])
        stats_manager = StatsManager()
        scene_manager = SceneManager(stats_manager)
        # Add ContentDetector algorithm (constructor takes detector options like threshold).
        scene_manager.add_detector(ContentDetector())
        base_timecode = video_manager.get_base_timecode()

        try:
            # Set downscale factor to improve processing speed (no args means default).
            video_manager.set_downscale_factor()

            # Start video_manager.
            video_manager.start()

            # Perform scene detection on video_manager.
            scene_manager.detect_scenes(frame_source=video_manager)

            # Obtain list of detected scenes.
            scene_list = scene_manager.get_scene_list(base_timecode)
            # Like FrameTimecodes, each scene in the scene_list can be sorted if the
            # list of scenes becomes unsorted.

            times = np.array([s[0].get_frames() for s in scene_list])
            np.save(vid_cut_dir, times)
        finally:
            video_manager.release()
    except:
        pass

Finding cuts of Video ../../data/7TavVZMewpY.mp4: 2002/20022

# Collect histogram variation between frames and cuts

In [5]:
# histogram similarity metrics
def norm_hist(H):
    return H / H.size

def hist_correlation(H1, H2):
    H1_bar = H1.mean()
    H2_bar = H2.mean()
    num = ((H1 - H1_bar)*(H2 - H2_bar)).sum()
    denom = ((H1 - H1_bar)**2).sum() * ((H2 - H2_bar)**2).sum()
    denom = denom ** 0.5
    return num / denom

def hist_intersection(H1, H2):
    return np.minimum(H1, H2).sum()

def hist_bhattacharyya(H1, H2):
    return -np.log(np.sqrt(H1*H2).sum())

def compare_histograms(H1, H2, data_dict=None, comparison_functions=[hist_correlation, hist_intersection, hist_bhattacharyya]):
    H1, H2 = norm_hist(H1), norm_hist(H2) # Convert to probability distribution
    if data_dict is None:
        data_dict = {f.__name__: [] for f in comparison_functions} 
    for f in comparison_functions:
        data_dict[f.__name__].append(f(H1, H2))
    return data_dict

In [6]:
def collect_video_stats(video_file_name):
    vid_file = os.path.basename(video_file_name)
    yt_id = vid_file.split(".")[0]
    vid_cut_file = f'{yt_id}_cuts.npy'
    vid_cut_dir = os.path.join(cuts_folder, vid_cut_file)
    vid_cuts = np.load(vid_cut_dir)
    print(f"Vid Cuts: {vid_cuts}")
    
    vid = cv2.VideoCapture(video_file_name)
    frame_count = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    
    i = 0
    H_frame_prev = None
    H_cut = None
    H_cut_prev = None
    frame_wise_data = None # Signals to compare_histograms that we need to replace this with a data structure
    cut_wise_data = None # Signals to compare_histograms that we need to replace this with a data structure

    while True:
        # Read image and make histogram
        success, im = vid.read()
        
        # Break if no more frames left
        if not success:# or i > 100:
            break
        
        # Print status update if there are more frames to process
        print(f"Processing Frame {i}/{frame_count}", end="\r")
        
        # Reset our histogram and collect data if we have encountered a cut
        if i in vid_cuts:
            
            # Collect cut_wise stats
            if H_cut_prev is not None:
                cut_wise_data = compare_histograms(H_cut, H_cut_prev, cut_wise_data)
            # (Re)set aggregation variables
            H_cut_prev = H_cut
            H_cut = np.zeros((255, 255, 255))

        # Make Histogram
        im = np.flip(im, axis=2) # Switch BGR (openCV standard) to RGB
        im = color.rgb2hsv(im) # Convert RGB to space for different viewing
        H_frame, _ = np.histogramdd(
            im.reshape(-1, im.shape[-1]),  #flattened image for input to histogramdd
            bins=(255, 255, 255)
        )
        
        # Collect frame_wise stats
        if H_cut_prev is not None:
            frame_wise_data = compare_histograms(H_frame, H_frame_prev, frame_wise_data)

        # Increment
        H_cut += H_frame
        H_frame_prev = H_frame
        i += 1
    
    # Make DataFrame
    return pd.DataFrame(frame_wise_data), pd.DataFrame(cut_wise_data)

In [None]:
for vid_file in downloaded:
    yt_id = vid_file.split(".")[0]
    file_dir = os.path.join(dataset_folder, vid_file)
    print(f"Processing Video {file_dir}")
    frame_wise_stats, cut_wise_stats = collect_video_stats(file_dir)
    frame_wise_stats.to_csv(os.path.join(analysis_folder, f"{yt_id}_byFrame.csv"))
    cut_wise_stats.to_csv(os.path.join(analysis_folder, f"{yt_id}_byCut.csv"))

Processing Video ../../data/xV5udqNpP94.mp4
Vid Cuts: [   0   39   89  207  251  290  341  368  389  423  459  480  615  646
  663  687  711  744  775  827  878  915  971  991 1014 1040 1064 1095
 1115 1141 1193 1227 1271 1300 1339 1365 1402 1463 1503 1535 1599 1620
 1643 1687 1747 1823 1838 1877 1942 1982 2001 2041 2082 2099 2139 2154
 2173 2194 2213 2231 2252 2299 2324 2341 2360 2383 2408 2427 2451 2470
 2491 2515 2535 2555 2659 2680 2757]
Processing Frame 0/2954

  out[idx, 0] = (arr[idx, 1] - arr[idx, 2]) / delta[idx]
  out[idx, 0] = 2. + (arr[idx, 2] - arr[idx, 0]) / delta[idx]
  out[idx, 0] = 4. + (arr[idx, 0] - arr[idx, 1]) / delta[idx]


Processing Frame 39/2954



Processing Frame 42/2954

  out_s = delta / out_v


Processing Video ../../data/TKPmGjVFbrY.mp4
Vid Cuts: [   0  546  644  662  682  783  958 1005 1073 1147 1218 1279 1327 1414
 1509 1552 1716 1786 1813 1920 1968 1983 2005 2027 2046 2080 2172 2211
 2228 2255 2278 2304 2462 2595 2628 2670 2709 2806 2821 2860 2961 3104
 3120 3147 3207 3238 3253 3291 3323 3342 3393 3412 3434 3470 3760 3866
 4117 4211]
Processing Video ../../data/uiA4B5Y63IQ.mp4
Vid Cuts: [   0   48   78  208  247  304  334  374  406  452  517  560  636  709
  731  748  773  811  849  937  985 1016 1040 1071 1096 1118 1146 1168
 1191 1222 1248 1278 1303 1332 1358 1388 1414 1466 1489 1518 1542 1569
 1595 1653 1692 1719 1747 1777 1807 1825 1849 1882 1898 1914 1929 1945
 1982 2003 2018 2047 2078 2107 2140 2165 2182 2243 2288 2322 2341 2389
 2437 2500 2549 2598 2624 2643 2673 2700 2725 2747 2773 2790 2817 2842
 2868 2900 2916 2931 2947 2997 3018 3077 3103 3119 3134 3154 3174 3189
 3330]
Processing Video ../../data/LeLsJfGmY_Y.mp4
Vid Cuts: [   0   22   42   62   92  120  253  3

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/QJDRdGxGw8U.mp4
Vid Cuts: [   0   23   68  104  146  176  197  232  254  275  304  332  350  367
  382  417  467  483  514  533  553  586  601  625  653  708  725  750
  778  802  849  869  895  913  931  960 1010 1033 1060 1081 1106 1121
 1138 1163 1186 1205 1224 1253 1268 1296 1352 1390 1426 1448 1472 1490
 1506 1525 1564 1595 1625 1642 1677 1719 1735 1750 1795 1830 1846 1874
 1899 1922 1960 1977 2004 2033 2048 2095 2111 2132 2151 2173 2195 2226
 2241 2292 2320 2337 2402 2468 2488 2504 2543 2613]
Processing Frame 1551/2861

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/h2_iEZtLEUs.mp4
Vid Cuts: [   0   75  434  742  912 1010 1082 1175 1552 1609 1688 1720 1764 1803
 1840 1883 1942 1960 1980 1998 2019 2047 2063 2078 2095 2140 2164 2224
 2260 2295 2315 2337 2359 2420 2455 2473 2488 2505 2523 2539 2565 2588
 2625 2647 2662 2690 2769 2792 2835 2883 2967 3003 3044 3105 3120 3140
 3167 3183 3214 3230 3245 3264 3305 3328 3343 3362 3389 3410 3429 3495
 3552 3653 3677 3699 3720 3744 3765 3785 3820 3835 3862 3918 3957 3980
 4094 4357]
Processing Frame 1051/4390

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/3zebJ_NoduE.mp4
Vid Cuts: [   0   24   88 1432 1450 1469 1506 1547 1568 1618 1637 1661 1692 1716
 1732 1763 1789 1805 1820 1836 1857 1872 1888 1913 1947 1976 1997 2018
 2042 2057 2092 2127 2145 2169 2186 2210 2227 2256 2272 2292 2307 2386
 2434 2481 2499 2521 2544 2563 2583 2611 2703 2742]
Processing Frame 2611/2934

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/8NNQqHsIc-4.mp4
Vid Cuts: [   0  121  157  176  275  301  326  371  415  497  668  695  724  754
  771  788  807  822  840  868  887  902  917  935  951  966  987 1004
 1029 1049 1073 1094 1119 1137 1153 1169 1185 1221 1297 1339 1405 1425
 1453 1469 1489 1518 1537 1559 1600 1623 1638 1672 1694 1728 1748 1770
 1791 1827 1843 1858 1886 1952 1969 1987 2036 2056 2177 2196 2220 2246
 2265 2287 2304 2354 2389 2423 2524 2553 2641 2661 2696 2733 2784 2799
 2814 2829 2846 2878 2900 2924 2944 2959 2980 3010 3118 3140]
Processing Frame 2123/3168

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/Gkh_fL_8IzQ.mp4
Vid Cuts: [   0   25   94  130  162  216  241  260  281  301  330  351  389  413
  494  539  562  585  609  630  651  666  682  697  712  729  748  767
  790  810  839  871  898  921  939  957  991 1014 1031 1050 1071 1089
 1115 1139 1168 1189 1205 1235 1260 1284 1315 1347 1372 1398 1430 1458
 1494 1510 1533 1558 1574 1589 1606 1621 1636 1654 1677 1694 1710 1744
 1761 1776 1804 1822 1838 1853 1877 1895 1923 1945 1971 1996 2018 2049
 2073 2097 2122 2141 2165 2187 2207 2244 2259 2281 2307 2326 2353 2394
 2411 2432 2453 2468 2483 2500 2518 2537 2552 2568 2583 2601 2616 2650
 2672 2687 2704 2720 2737 2767 2782 2797 2813 2830 2857 2903 2928 2973
 2988 3014 3036 3064 3117 3132 3168 3257]
Processing Frame 1465/3288

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/xGEJwJ32oBs.mp4
Vid Cuts: [   0   31  181  569  592  701  716  970 1314 1333 1370 1385 1408 1427
 1442 1458 1476 1491 1506 1528 1547 1567 1597 1648 1663 1680 1701 1717
 1732 1762 1778 1803 1828 1848 1867 1893 1923 1943 1958 1986 2001 2028
 2043 2089 2182 2198 2243]
Processing Frame 2153/2444

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/Yt7ofokzn04.mp4
Vid Cuts: [   0   33  129  166  220  268  306  358  385  412  468  580  596  622
  656  714  744  787  830  878  951  986 1044 1077 1094 1115 1139 1154
 1171 1186 1201 1218 1277 1300 1329 1360 1393 1423 1487 1571 1595 1617
 1638 1670 1694 1714 1754 1791 1855 1909 1946 1970 2004 2028 2050 2078
 2107 2126 2150 2165 2181 2206 2225 2260 2291 2332 2354 2379 2402 2420
 2443 2459 2474 2504 2556 2588 2645 2700 2772 2849 2882 2912 2952 2983
 2999 3014 3029 3047 3075 3118 3138 3159 3182 3213 3292 3346 3380 3401]
Processing Video ../../data/4SerZm7DheA.mp4
Vid Cuts: [   0  110  158  189  278  487  522  553  598  635  657  690  721  751
  791  824  865  935  985 1019 1047 1079 1104 1154 1193 1223 1238 1263
 1278 1306 1321 1338 1358 1377 1404 1426 1448 1484 1500 1567 1582 1614
 1650 1672 1705 1753 1850 1881 1939 1961 1992 2009 2031 2048 2070 2086
 2111 2129 2156 2171 2186 2226 2257 2288 2335 2371 2393 2410 2430 2449
 2469 2501 2529 2555 2575 2592 2619 263

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/Kq02sBe5xko.mp4
Vid Cuts: [   0   14   36   55   72   91  126  155  183  205  220  261  284  309
  355  379  410  431  469  491  527  550  598  640  696  739  761  804
  834  871  893  919  941  960  982 1016 1053 1089 1105 1134 1165 1189
 1238 1276 1338 1355 1396 1423 1451 1469 1499 1554 1601 1768 1795 1827
 1863 1904 1991 2011 2033 2050 2089 2131 2152 2176 2210 2237 2252 2277
 2292 2307 2417 2470 2499 2534 2551 2642 2713 2736 2757 2779 2864 2900
 2981 2999 3081 3112 3171 3217]
Processing Frame 1577/3369

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Processing Video ../../data/0poXFSvX0_4.mp4
Vid Cuts: [   0   49  133  161  183  210  225  328  347  365  388  403  424  448
  508  536  551  614  631  654  722  745  785  813  829  862  893  942
  967  992 1007 1023 1055 1119 1143 1163 1230 1258 1279 1296 1321 1343
 1366 1386 1403 1427 1450 1473 1490 1508 1527 1544 1559 1578 1609 1624
 1648 1665 1682 1722 1745 1779 1795 1823 1838 1853 1868 1909 1932 1947
 1967 1982 2028]
Processing Video ../../data/pOvfmpgbzHI.mp4
Vid Cuts: [   0   24  144  168  184  200  217  233  295  310  356  428  556  594
  660  693  716  733  754  781  807  858  879  917  942  959 1040 1059
 1076 1103 1164 1188 1203 1220 1235 1267 1296 1336 1412 1440 1480 1496
 1519 1535 1561 1579 1606 1645 1664 1687 1706 1725 1744 1763 1786 1816
 1841 1895 1913 1928 1967 1988 2007 2032 2064 2088 2116 2131 2147 2162
 2177 2196 2266 2282 2298 2320 2347 2382 2402 2424 2457 2492 2513 2534
 2572 2602 2619 2648 2667 2682 2703 2723 2789 2912 2931 2983 3006 3037
 3066 3154]
Processing 