## Extract videos to frames

This function uses the OpenCV library to read the video files and extract the frames. It creates a subdirectory in the output folder for each video file, and saves the frames as JPEG images in that subdirectory with names like frame_0000.jpg, frame_0001.jpg, etc.

In [1]:
import cv2
import numpy as np
import os

def extract_frames(input_folder, output_folder):
    # Loop through all files in the input folder
    for filename in os.listdir(input_folder):
        # Check if the file is a video
        if filename.endswith('.mp4') or filename.endswith('.avi'):
            # Create a directory for the frames of this video in the output folder
            output_subfolder = os.path.join(output_folder, os.path.splitext(filename)[0])
            os.makedirs(output_subfolder, exist_ok=True)
            # Open the video file
            cap = cv2.VideoCapture(os.path.join(input_folder, filename))
            # Loop through all frames of the video
            frame_count = 0
            while True:
                # Read a frame from the video
                ret, frame = cap.read()
                if not ret:
                    break
                # Save the frame as a JPEG image in the output directory
                output_filename = os.path.join(output_subfolder, f'frame_{frame_count:04d}.jpg')
                cv2.imwrite(output_filename, frame)
                frame_count += 1
            # Release the video file
            cap.release()

In [2]:
extract_frames('./videos/in', './videos/out')

---

## Edge detection

This function first loops through all subdirectories (i.e., video frame directories) in the input folder created by the extract_frames() function. For each subdirectory, it creates a corresponding subdirectory in the output folder, loops through all files (i.e., frames) in the subdirectory, detects edges of the image using the Canny edge detection algorithm, and saves the edges image to the output subdirectory with the same filename as the original image.

In [3]:
def detect_edges(input_folder, output_folder):
    # Loop through all subdirectories (i.e., video frame directories) in the input folder
    for subfolder in os.listdir(input_folder):
        if os.path.isdir(os.path.join(input_folder, subfolder)):
            # Create a corresponding subdirectory in the output folder
            output_subfolder = os.path.join(output_folder, subfolder)
            os.makedirs(output_subfolder, exist_ok=True)
            # Loop through all files (i.e., frames) in the subdirectory
            for filename in os.listdir(os.path.join(input_folder, subfolder)):
                # Check if the file is an image
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    # Read the image file
                    img = cv2.imread(os.path.join(input_folder, subfolder, filename))
                    # Convert the image to grayscale
                    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    # Detect edges using the Canny edge detection algorithm
                    edges = cv2.Canny(gray, 100, 200)
                    # Save the edges image to the output subdirectory
                    output_filename = os.path.join(output_subfolder, filename)
                    cv2.imwrite(output_filename, edges)

In [4]:
detect_edges('./videos/out', './videos/edge')

---

## Edge to dataframes

This function takes the input_folder and output_folder as arguments. It first iterates over all the subfolders in the input_folder. For each subfolder, it creates the corresponding output folder in the output_folder.

It then iterates over all the edge images in the subfolder, loads each image, reduces its resolution to 1/5 using OpenCV's resize function, and converts the image to a 0-1 DataFrame by thresholding the pixel values. It flattens the DataFrame and adds the resulting row to a list of rows.

Once it has processed all the edge images in the subfolder, it creates a DataFrame with all the rows and writes it to a CSV file in the corresponding output folder.

Note that this function assumes that the edge images are stored as grayscale images, and it uses OpenCV's IMREAD_GRAYSCALE flag to load them. If the edge images are stored in a different format, you may need to modify the code accordingly.

In [7]:
import pandas as pd
import numpy as np

def reduce_resolution_and_convert_to_df(input_folder, output_folder):
    # Iterate over all the subfolders in the input folder
    for subfolder in os.listdir(input_folder):
        subfolder_path = os.path.join(input_folder, subfolder)
        
        # Skip non-directories
        if not os.path.isdir(subfolder_path):
            continue
        
        # Create the output folder if it doesn't exist
        output_subfolder = os.path.join(output_folder, subfolder)
        os.makedirs(output_subfolder, exist_ok=True)
        
        # Iterate over all the edge images in the subfolder
        df_rows = []
        for edge_image_file in os.listdir(subfolder_path):
            edge_image_path = os.path.join(subfolder_path, edge_image_file)
            
            # Load the edge image and reduce its resolution to 1/5
            edge_image = cv2.imread(edge_image_path, cv2.IMREAD_GRAYSCALE)
            edge_image_small = cv2.resize(edge_image, (0, 0), fx=0.2, fy=0.2)
            
            # Convert the edge image to a 0-1 DataFrame
            edge_image_binary = np.where(edge_image_small < 128, 0, 1)
            df_rows.append(edge_image_binary.flatten())
        
        # Create the DataFrame for this subfolder
        df = pd.DataFrame(df_rows)
        
        # Write the DataFrame to a CSV file in the output folder
        output_file = os.path.join(output_subfolder, f"{subfolder}.csv")
        df.to_csv(output_file, index=False, header=False)

In [8]:
reduce_resolution_and_convert_to_df('./videos/edge', './videos/dataframes')