# Retrieve the Frames from the video database

In [None]:
import os
import cv2

# specify the directory where the videos are stored
videos_dir = "Dataset"

# specify the directory where the frames will be saved
frames_dir = "Retrieved Frames"

# loop through all the videos in the directory
for video_filename in os.listdir(videos_dir):
    video_path = os.path.join(videos_dir, video_filename)
    
    if os.path.isfile(video_path) and (video_filename.endswith(".mp4") or video_filename.endswith(".avi")):
        # read the video file
        video = cv2.VideoCapture(video_path)
        
        # create a directory for the frames of this video
        video_name = os.path.splitext(video_filename)[0]
        video_frames_dir = os.path.join(frames_dir, video_name)
        os.makedirs(video_frames_dir, exist_ok=True)
        
        # loop through all the frames in the video
        frame_count = 0
        while True:
            success, frame = video.read()
            if not success:
                break
            
            # save the frame as an image file
            frame_path = os.path.join(video_frames_dir, f"frame_{frame_count:06d}.jpg")
            cv2.imwrite(frame_path, frame)
            
            frame_count += 1
        
        # release the video file
        video.release()


# Preprocess the Frames

In [None]:
import cv2
import numpy as np

frames_dir = 'D:\\major\\MJ23\\Frames\\video194'

# Load frames from the directory
frames = []
for file_name in os.listdir(frames_dir):
    # Read the image and append to the frames list
    frame = cv2.imread(os.path.join(frames_dir, file_name))
    frames.append(frame)

# Define a threshold for similarity
mse_threshold = 10  # Adjust this value based on your needs

# Calculate the MSE between the frames and store in a dictionary
mse_dict = {}
for i in range(len(frames)):
    for j in range(i + 1, len(frames)):
        mse = np.mean((frames[i] - frames[j]) ** 2)
        mse_dict[(i, j)] = mse

# Identify similar frames and store their indices in a list
similar_frames_indices = []
for (i, j), mse in mse_dict.items():
    if mse < mse_threshold:
        similar_frames_indices.append(j)

# Convert the list of similar frames indices to a set for efficient deletion
similar_frames_indices = set(similar_frames_indices)

# Delete similar frames from the original pool of frames
filtered_frames = [frame for idx, frame in enumerate(frames) if idx not in similar_frames_indices]

In [None]:
filtered_frames

[array([[[ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         ...,
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0]],
 
        [[ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         ...,
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0]],
 
        [[ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0],
         ...,
         [ 0,  0,  0],
         [ 0,  0,  0],
         [ 0,  0,  0]],
 
        ...,
...
         [ 5, 12, 31],
         ...,
         [ 0,  5,  4],
         [ 0,  5,  4],
         [ 0,  5,  4]]], dtype=uint8)]

         Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...


In [None]:
import cv2
import os

# Define the directory to save the filtered frames
output_dir = 'PreprocessDataset'

# Create the output directory if it does not exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through filtered frames and save them to the output directory
for i, frame in enumerate(filtered_frames):
    # Define the output file name for the frame
    output_file = os.path.join(output_dir, f'filtered_frame_video194_{i+1}.png')  # You can customize the file name format as needed
    
    # Save the frame as an image file
    cv2.imwrite(output_file, frame)
