### Import Libraries
OpenCV, NumPy and Matplotlib 

In [1]:
import cv2
import joblib
import random
import numpy as np
import matplotlib.pyplot as plt

### Define Constants
A set of constants which we will be using throughout the notebook

In [6]:
''' Dimensions of the frame which will form our dataset '''
frame_height = 240
frame_width = 360

''' The desired crop size which we use for constructing the windows of our dataset ''' 
window_dim = 80

''' Definition of important directories '''
dataset_directory = './'
videos_directory = '../Clips/Train/'

### Define window sequence analyzers
Here, I define a set of functions which will provide us some information about any given triple of windows, which in turn will help us in deciding whether to include the given triple within our dataset or not.

Usually, we would want only those triples whithin our dataset for which intermediate window generation is neither too easy, nor too difficult. The first function, `average_frame_evaluator` calculates a value which indicates how well the average window fares as the intermediate window. We can use the performance of the average window(as the intermediate window) as a measure of how difficult or easy intermediate window generation is for a given triple. For any given triple, if the function `average_frame_evaluator` calculates a relatively small value, it indicates that very little movement of objects takes place within the window. 

Whenever we use a continuous sequence of frames for generating a set of triples for our dataset, we are bound to run into triples across which a scene change or a scene break occcurs. These triples are pretty much useless, and we would not want to include them in our dataset. The third function, `detect_broken_frame_sequence` filters out any such triples from a given set of triples, by making use of the first two functions. 

In [7]:
def average_window_evaluator(example):
    average_window = (example[0].astype(np.uint64) + example[2].astype(np.uint64)) // 2
    average_window_accuracy_metric = np.mean((average_window - example[1]) ** 2)
    return average_window_accuracy_metric

def window_triple_break_evaluator(example):
    window1_mid_diff = np.mean((example[0].astype(np.uint64) - example[1].astype(np.uint64)) ** 2)
    window2_mid_diff = np.mean((example[2].astype(np.uint64) - example[1].astype(np.uint64)) ** 2)
    return (window1_mid_diff - window2_mid_diff) / (window1_mid_diff + window2_mid_diff)

def detect_broken_window_triple(example):
    average_window_diff = average_window_evaluator(example)
    window_triple_broken_prob = abs(window_triple_break_evaluator(example))
    if window_triple_broken_prob >= 0.20 and average_window_diff > 5000:
        return False
    if window_triple_broken_prob >= 0.35 and average_window_diff > 3000:
        return False
    if window_triple_broken_prob >= 0.50 and average_window_diff > 2000:
        return False
    if window_triple_broken_prob >= 0.60 and average_window_diff > 1200:
        return False
    if window_triple_broken_prob >= 0.70 and average_window_diff > 600:
        return False
    if window_triple_broken_prob >= 0.80 and average_window_diff > 100:
        return False
    return True

### Define functions to detect noisy frames

The two functions defined below help in deciding whether a frame contains a lot of noise, or a large number of small objects which could be hard to keep track of in any kind of motion. The two functions are completely similar in what they try to achieve. However, the first function is more precise but slower, and the second is less precise but faster.   

The first function detects noise(or a large collection of small objects) in a given frame by measuring how simliar the neighboring pixels of any given pixel are to itself, and then aggregating this measure for all pixels within the frame. 
The second function detects noise within a given frame by first downscaling the frame, and then upscaling it back to its original resolution, and comparing the resulting frame against the original frame. 

In [8]:
def frame_speckiness_evaluator(frame, translation):
    total_diff = 0
    frame_width = frame.shape[0]
    frame_height = frame.shape[1]
    frame = frame.astype(np.int32)

    for i in range(-translation, translation + 1):
        for j in range(-translation, translation + 1):
            window_1 = frame[max(0, i):min(frame_height, frame_height + i), 
                                max(0, j):min(frame_width, frame_width + j), 0:3]
            window_2 = frame[max(0, -i):min(frame_height, frame_height - i), 
                                max(0, -j):min(frame_width, frame_width - j), 0:3]
            diff = ((np.sum(abs(window_1 - window_2), axis=2) > 80).astype(np.uint32)) * 200
            total_diff += (np.sum(diff) / (frame_width * frame_height * 
                                (translation ** 2)))
    return total_diff

def frame_bloom_evaluator(frame, factor):
    total_diff = 0
    frame_width = frame.shape[0]
    frame_height = frame.shape[1]
    window_1 = frame
    window_2 = cv2.resize(cv2.resize(frame, (int(frame_width * factor), int(frame_height * factor))), (frame_width, frame_height))
    diff = ((abs(window_1.astype(np.int32) - window_2.astype(np.int32)) > 5).astype(np.uint64)) * 20
    total_diff += (np.sum(diff) / (frame_width * frame_height))
    return total_diff

### Define threshold values which will be used for filtering
These values may change slightly, depending the window size used for cropping

In [20]:
WINDOW_TRIPLE_AVERAGE_DIFF_LOWER_THRESHOLD_VAL = 100
WINDOW_TRIPLE_AVERAGE_DIFF_UPPER_THRESHOLD_VAL = 15000
FRAME_SPECKINESS_THRESHOLD_VALUE = 420
FRAME_SPECKINESS_TRANSLATION_FACTOR = 1
FRAME_BLOOM_THRESHOLD_VALUE = 30
FRAME_BLOOM_FACTOR = 0.27

### Define function to extract useful window triples from a list of triples
The following function filters out all unneccesary window triples from a given list of window triples.

In [17]:
def extract_useful_window_triples(triple_list):
    triple_list = filter(lambda x: (WINDOW_TRIPLE_AVERAGE_DIFF_UPPER_THRESHOLD_VAL > 
                        average_window_evaluator(x) > WINDOW_TRIPLE_AVERAGE_DIFF_LOWER_THRESHOLD_VAL), 
                        triple_list)
    triple_list = filter(lambda x: detect_broken_window_triple(x), triple_list)
    triple_list = filter(lambda x: (frame_speckiness_evaluator(x[0], FRAME_SPECKINESS_TRANSLATION_FACTOR) <= 
                        FRAME_SPECKINESS_THRESHOLD_VALUE and frame_speckiness_evaluator(x[1], 
                        FRAME_SPECKINESS_TRANSLATION_FACTOR) <= FRAME_SPECKINESS_THRESHOLD_VALUE), triple_list)
    triple_list = list(triple_list)
    print("Useful sequence count =", len(triple_list))

    return triple_list

### Define function to get a set of candidate window triples from a series of continuos frames
The following function retreives a set of candidate window triple from a series of continuos frames

In [18]:
def get_window_triples(frame_list, max_sequence_count, crop_image_dist, gap_3_prob, gap_5_prob, gap_7_prob):
    cropped_sequences = []
    start = 0
    while(len(cropped_sequences) < max_sequence_count and start + 10 < len(frame_list)):
        gap_estimator = random.random()
        if gap_estimator <= gap_3_prob:
            gap = 3
        elif gap_estimator <= gap_3_prob + gap_5_prob:
            gap = 5
        else:
            gap = 7

        Y = frame_list[start + gap // 2]
        X2 = frame_list[start + gap - 1]
        X1 = frame_list[start]
        for i in range(0, frame_height - window_dim, crop_image_dist):
            for j in range(0, frame_width - window_dim, crop_image_dist):
                y, x = i, j
                Y_cropped = Y[y:y + window_dim, x:x + window_dim, :].astype(np.uint8)
                X2_cropped = X2[y:y + window_dim, x:x + window_dim, :].astype(np.uint8)
                X1_cropped = X1[y:y + window_dim, x:x + window_dim, :].astype(np.uint8)
                cropped_sequences.append((X1_cropped, Y_cropped, X2_cropped))

        start = start + gap

    print("Cropped sequence count =", len(cropped_sequences))
    return cropped_sequences

### Define function to get continuous frames from a clip
The following function retreives a given number of continuos frames from a video clip, given a starting frame index 

In [12]:
def get_frames(file_path, start_frame, num_frames):
    frames = []
    frame_count = 0
    video = cv2.VideoCapture(file_path)
    while (video.isOpened()):
        ret, frame = video.read()
        if ret == True:
            frame_count += 1
            if frame_count >= start_frame:
                if len(frames) <= num_frames:
                    frames.append(cv2.resize(frame, (frame_width, frame_height)))
                    if(len(frames) % 1000 == 0):
                        print("Frame Count = ", len(frames))
                else:
                    break
        else:
            break

    video.release()
    cv2.destroyAllWindows()
    return frames

### Define function to covert a frame sequence list to dataset
The following function converts a given list of window triples to an X-Y dataset.

In [13]:
def window_triple_to_dataset(frame_sequences):
    num_sequences = len(frame_sequences)
    X = np.zeros((num_sequences, window_dim, window_dim, 6), dtype=np.uint8)
    Y = np.zeros((num_sequences, window_dim, window_dim, 3), dtype=np.uint8)
    for i, sequence in enumerate(frame_sequences):
        X[i, :, :, 0:3] = frame_sequences[i][0]
        Y[i, :, :, 0:3] = frame_sequences[i][1]
        X[i, :, :, 3:6] = frame_sequences[i][2]
    return X, Y

### Get useful dataset
The following piece of code creates a dataset of window triples and saves it to file

In [24]:
final_window_triple_list = []

In [25]:
frame_list = get_frames(videos_directory + 'clip1.mp4', 0, 15000)
window_triples = get_window_triples(frame_list, 100000, 30, 0.14, 0.62, 0.24)
useful_window_triples = extract_useful_window_triples(cropped_sequences)

Cropped sequence count = 3000
Useful sequence count = 101


In [26]:
final_window_triple_list.extend(useful_window_triples[0:])

In [27]:
final_window_triple_list = sorted(final_window_triple_list, key=average_window_evaluator, reverse=True)

In [28]:
X, Y = window_triple_to_dataset(final_window_triple_list)
np.save(dataset_directory + 'X_dummy.npy', X)
np.save(dataset_directory + 'Y_dummy.npy', Y)

In [None]:
''' Periodically deleteing unnecessary variables to keep memory usage low '''
del frame_list
del cropped_sequences
del useful_cropped_sequences

In [None]:
len(final_frame_sequence_list)