In [1]:
import os
import sys
annotations_dir = os.path.normpath(os.getcwd() + os.sep + "annotations")
train_dir = os.path.normpath(os.getcwd() + os.sep + "video" + os.sep + "train")
val_dir = os.path.normpath(os.getcwd() + os.sep + "video" + os.sep + "val")
sys.path.append(annotations_dir)
sys.path.append(train_dir)
sys.path.append(val_dir)

import json
import numpy as np
import shutil


In [7]:
transition_times = {}
def read_transition_times():
    '''
    Read the transition_times.json in annotations folder, for future processing of videos
    '''
    global transition_times

    with open('./annotations/transition_times.json', 'r', encoding='utf-8') as file:
        transition_times = json.load(file)

read_transition_times()

In [5]:
def deleteTrainUnqualified():
    '''
    Delete unqualified videos according to train_filtered.txt
    Base on transition_times.json, can find videos to delete
    '''
    qualified = []
    with open('./annotations/train_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        qualified.append(line.strip() + ".mp4") # record all qualified filenames
    print(len(qualified))
    train_dir = './video/train'
    cnt = 0
    for filename in os.listdir(train_dir):
        if filename not in qualified:
            cnt += 1
            file_path = os.path.join(train_dir, filename)
            os.remove(file_path)
    print("Unqualified video count: ", cnt)
    return qualified

def deleteValUnqualified():
    '''
    Delete unqualified videos according to val_filtered.txt
    Base on transition_times.json, can find videos to delete
    '''
    qualified = []
    with open('./annotations/val_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        qualified.append(line.strip() + ".mp4") # record all qualified filenames
    print(len(qualified))
    val_dir = './video/val'
    cnt = 0
    for filename in os.listdir(val_dir):
        if filename not in qualified:
            cnt += 1
            file_path = os.path.join(val_dir, filename)
            os.remove(file_path)
    print("Unqualified video count: ", cnt)
    return qualified

In [6]:
def count_files_with_n_notfound_3():
    '''
    Count and record videos that are labeled as "Intentional/No oops", and are longer than 5 seconds
    store 2 list of filename strings
    '''
    train_intentional, val_intentional = [], []
    with open('./annotations/train_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        filename = line.strip()
        if filename in transition_times and transition_times[filename]["n_notfound"] == 3 \
        and transition_times[filename]["len"] >= 5 and transition_times[filename]["len"] <= 10:
            train_intentional.append(filename)

    with open('./annotations/val_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        filename = line.strip()
        if filename in transition_times and transition_times[filename]["n_notfound"] == 3 \
        and transition_times[filename]["len"] >= 5 and transition_times[filename]["len"] <= 10:
            val_intentional.append(filename)

    return train_intentional, val_intentional

In [18]:
train_intentional, val_intentional = count_files_with_n_notfound_3()
print(len(train_intentional), len(val_intentional))

347 283


In [13]:
def count_files_with_n_notfound_0():
    '''
    Count and record videos that are unanimously labeled as "Unintentional/Transitional/Oops", longer than 5 seconds
    store 2 list of filename strings
    '''
    train_oops, val_oops = [], []
    with open('./annotations/train_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        filename = line.strip()
        if filename in transition_times and transition_times[filename]["n_notfound"] == 0 \
        and transition_times[filename]["len"] >= 5 and transition_times[filename]["len"] <= 10:
            accident = np.mean(transition_times[filename]["t"])
            accident_min = np.min(transition_times[filename]["t"])
            accident_max = np.max(transition_times[filename]["t"])

            if accident <= 4 and accident >= 1 and accident_max - accident_min < 0.8:
                # Prune away transition time that is too early (< 1), 
                # also prune away transition time after 5 for future video trimming
                train_oops.append(filename)

    with open('./annotations/val_filtered.txt', 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    for line in lines:
        filename = line.strip()
        if filename in transition_times and transition_times[filename]["n_notfound"] == 0 \
        and transition_times[filename]["len"] >= 5 and transition_times[filename]["len"] <= 10:
            accident = np.mean(transition_times[filename]["t"])
            accident_min = np.min(transition_times[filename]["t"])
            accident_max = np.max(transition_times[filename]["t"])

            if accident <= 4 and accident >= 1 and accident_max - accident_min < 0.8:
                val_oops.append(filename)

    return train_oops, val_oops

In [17]:
train_oops, val_oops = count_files_with_n_notfound_0()
print(len(train_oops), len(val_oops))

668 450


In [20]:
def create_new_folder():
    '''
    Create the new folder structure for selected videos:
    video_small -> (train, val)
    train -> (oops, no_oops)
    val -> (oops, no_oops)
    '''

    # Define the main folder path
    main_folder = "./video_small"
    # Create the main folder if it doesn't exist
    if not os.path.exists(main_folder):
        os.makedirs(main_folder)

    # Define the train and val folders
    train_folder = os.path.join(main_folder, "train")
    val_folder = os.path.join(main_folder, "val")

    # Create the train and val folders if they don't exist
    for folder in [train_folder, val_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # Define subfolders for train and val
    train_no_oops_folder = os.path.join(train_folder, "no_oops")
    train_oops_folder = os.path.join(train_folder, "oops")
    val_no_oops_folder = os.path.join(val_folder, "no_oops")
    val_oops_folder = os.path.join(val_folder, "oops")

    # Create subfolders for train and val
    for folder in [train_no_oops_folder, train_oops_folder, val_no_oops_folder, val_oops_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)


def copy_files(source_dir, file_list, dest_dir):
    '''
    Copy the selected videos to new destination with the new folder structure
    '''
    for file_name in file_list:
        source_file = os.path.join(source_dir, file_name + ".mp4")
        destination_file = os.path.join(dest_dir, file_name + ".mp4")
        if os.path.exists(destination_file):
            print(f"File '{file_name}.mp4' already exists in '{dest_dir}'")
        else:
            shutil.copyfile(source_file, destination_file, follow_symlinks=True)
    print(f"Copied files from {source_dir} to {dest_dir}")

In [16]:
create_new_folder()

In [19]:
train_intentional = [name.encode('utf-8').decode('utf-8') for name in train_intentional]
train_oops = [name.encode('utf-8').decode('utf-8') for name in train_oops]
val_intentional = [name.encode('utf-8').decode('utf-8') for name in val_intentional]
val_oops = [name.encode('utf-8').decode('utf-8') for name in val_oops]

In [21]:
copy_files("./video/train", train_intentional, "./video_small/train/no_oops")

Copied files from ./video/train to ./video_small/train/no_oops


In [22]:
copy_files("./video/train", train_oops, "./video_small/train/oops")

Copied files from ./video/train to ./video_small/train/oops


In [23]:
copy_files("./video/val", val_intentional, "./video_small/val/no_oops")

Copied files from ./video/val to ./video_small/val/no_oops


In [24]:
copy_files("./video/val", val_oops, "./video_small/val/oops")

Copied files from ./video/val to ./video_small/val/oops


In [6]:
small_transition_times = {}

def update_small_transition_times(folder_path, category, label):
    '''
    Create a smaller json base on the selected videos
    '''
    global small_transition_times
    for filename in os.listdir(folder_path):
        if filename.endswith(".mp4"):
            video_name = os.path.splitext(filename)[0]
            if video_name in transition_times:
                small_transition_times[video_name] = transition_times[video_name]
                small_transition_times[video_name]["category"] = category
                small_transition_times[video_name]["label"] = label

In [3]:
train_no_oops_folder = "./video_small/train/no_oops"
train_oops_folder = "./video_small/train/oops"
val_no_oops_folder = "./video_small/val/no_oops"
val_oops_folder = "./video_small/val/oops"

In [4]:
# Trim val folder to contain less videos
import os
import random
import shutil

# Define the paths to the video folders
val_no_oops_folder = "./video_small/val/no_oops"
val_oops_folder = "./video_small/val/oops"

# Function to randomly select and delete videos from a folder
def select_and_delete_videos(folder_path, num_to_keep):
    # Get the list of all video files in the folder
    all_videos = os.listdir(folder_path)
    
    # Calculate the number of videos to delete
    num_to_delete = len(all_videos) - num_to_keep
    
    # Randomly select videos to delete
    videos_to_delete = random.sample(all_videos, num_to_delete)
    
    # Delete the selected videos
    for video in videos_to_delete:
        video_path = os.path.join(folder_path, video)
        os.remove(video_path)
        print(f"Deleted {video} from {folder_path}")

# Randomly select and delete videos from val/no_oops folder
select_and_delete_videos(val_no_oops_folder, num_to_keep=80)

# Randomly select and delete videos from val/oops folder
select_and_delete_videos(val_oops_folder, num_to_keep=160)


Deleted We're Back! Fails of the Week (May 2019) _ FailArmy29.mp4 from ./video_small/val/no_oops
Deleted Field Goal Fail - Fails of the Week (January 2019)11.mp4 from ./video_small/val/no_oops
Deleted Fails of the Week - Lookout for That Fence! (March 2017) _ FailArmy16.mp4 from ./video_small/val/no_oops
Deleted Best Fails of the Week 3 May 2016 _ FailArmy64.mp4 from ./video_small/val/no_oops
Deleted We're Back! Fails of the Week (May 2019) _ FailArmy32.mp4 from ./video_small/val/no_oops
Deleted FailFactory - Derpy Dogs (March 2019) _ FailArmy141.mp4 from ./video_small/val/no_oops
Deleted The Master Swordsmen - Fails You Missed #17 _ FailArmy76.mp4 from ./video_small/val/no_oops
Deleted The Ultimate Funny Monkey Compilation _ Monkey Fails by FailArmy39.mp4 from ./video_small/val/no_oops
Deleted Father's Day Fails _ 'Dad Fails' By FailArmy 201636.mp4 from ./video_small/val/no_oops
Deleted Get Out Of The Way!! - FailArmy After Dark (Ep. 12)40.mp4 from ./video_small/val/no_oops
Deleted Fa

In [8]:


# Update small_transition_times for train videos
update_small_transition_times(train_no_oops_folder, "train", "no_oops")
update_small_transition_times(train_oops_folder, "train", "oops")

# Update small_transition_times for val videos
update_small_transition_times(val_no_oops_folder, "val", "no_oops")
update_small_transition_times(val_oops_folder, "val", "oops")

In [10]:
print(len(small_transition_times))

1255


In [11]:
output_json_path = "./video_small/smaller_transition_times.json"

output_directory = os.path.dirname(output_json_path)
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

with open(output_json_path, 'w', encoding='utf-8') as output_json_file:
    json.dump(small_transition_times, output_json_file, indent=4, ensure_ascii=False)

print("Small transition times JSON file saved successfully at:", output_json_path)

Small transition times JSON file saved successfully at: ./video_small/smaller_transition_times.json
