In [5]:
import cv2
import os
import threading
import concurrent.futures

In [6]:
def extract_frames_from_video(video_file_path, convert_path, start_frame_count):
    """
    Extracts frames from a single video file.

    Args:
        video_file_path (str): The path to the video file.
        convert_path (str): The path to save the extracted frames.
        start_frame_count (int): The starting frame count for this video.

    Returns:
        int: The number of frames extracted from this video.
    """
    print(f"Processing video: {video_file_path}")
    cap = cv2.VideoCapture(video_file_path)

    if not cap.isOpened():
        print(f"Error: Could not open video file: {video_file_path}")
        return 0

    frame_count = start_frame_count
    extracted_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video

        frame_file_name = f"frame_{frame_count:06d}.png"
        frame_file_path = os.path.join(convert_path, frame_file_name)

        cv2.imwrite(frame_file_path, frame)
        frame_count += 1
        extracted_count += 1

    cap.release()
    print(f"Finished processing video: {video_file_path}, extracted {extracted_count} frames")
    return extracted_count


In [7]:
def extract_frames(base_video_path, base_image_path, data_type):
    """
    Extracts frames from videos using multithreading.
    """
    if data_type.lower() == "train":
        video_path = os.path.join(base_video_path, "train/")
        convert_path = os.path.join(base_image_path, "train/")
    elif data_type.lower() == "test":
        video_path = os.path.join(base_video_path, "test/")
        convert_path = os.path.join(base_image_path, "test/")
    elif data_type.lower() == "val":
        video_path = os.path.join(base_video_path, "val/")
        convert_path = os.path.join(base_image_path, "val/")
    else:
        raise ValueError("Invalid data_type. Must be 'train', 'test', or 'val'.")

    os.makedirs(convert_path, exist_ok=True)

    video_files = sorted([
        os.path.join(video_path, f)
        for f in os.listdir(video_path)
        if f.endswith(('.mp4', '.avi', '.mov'))
    ])

    # Determine the number of worker threads
    num_workers = os.cpu_count()  # Use all available CPU cores
    if num_workers is None:
        num_workers = 1 # Use 1 if cannot determine number of CPUs
    print(f"Using {num_workers} worker threads.")


    total_frames = 0
    frame_count = 0
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = []
        for video_file in video_files:
            future = executor.submit(extract_frames_from_video, video_file, convert_path, frame_count)
            futures.append(future)
            frame_count += sum([f.result() for f in futures]) #Update the frame_count in time.
            futures = [] # Clean the futures, so we can update frame_count correctly.

        # Wait for all tasks to complete and get the results
        for future in concurrent.futures.as_completed(futures):
            try:
                total_frames += future.result()
            except Exception as exc:
                print(f"Generated an exception: {exc}")


    print(f"Finished extracting frames for {data_type}. Total frames: {total_frames}")
    return total_frames

In [None]:
base_video_path = "../datasets/yolo_obb/videos/"
base_image_path = "../datasets/yolo_obb/images/"

# Check if base directories exist, create them if necessary
os.makedirs(base_video_path, exist_ok=True)
os.makedirs(base_image_path, exist_ok=True)

# Extract frames for train, test and validation sets
total_train_frames = extract_frames(base_video_path, base_image_path, "train")
total_test_frames = extract_frames(base_video_path, base_image_path, "test")
total_val_frames = extract_frames(base_video_path, base_image_path, "val")
print(f"Total extracted frames:\n images: {total_train_frames}\n images: {total_test_frames}\n images:{total_val_frames}")

Using 24 worker threads.
Processing video: ../datasets/yolo_obb/videos/train/video_train_001.mp4
