In [1]:
from google.colab import drive #we're fetching data from drive
import cv2 #for video processing
import os #for path manipulations and creating directories
#import sys #(optional) to modify Python's search path if you want to import your own modules from the src directory

drive.mount('/content/drive')

#project root path
PROJECT_ROOT = '/content/drive/My Drive/car-behaviour-project'

#key directories
RAW_VIDEOS_DIR = os.path.join(PROJECT_ROOT, 'data/raw_videos')
EXTRACTED_FRAMES_DIR = os.path.join(PROJECT_ROOT, 'data/extracted_frames')
TRACKING_OUTPUTS_DIR = os.path.join(PROJECT_ROOT, 'data/tracking_outputs') # For notebook 2
#BEHAVIOUR_ANNOTATIONS_DIR = os.path.join(PROJECT_ROOT, 'data/behaviour_annotations') # For notebook 4
MODEL_DIR = os.path.join(PROJECT_ROOT, 'models/yolov8') # For YOLO models

'''Add Project Root to Python Path (if you later create src files) ---
 If you create .py files in a 'src' folder within PROJECT_ROOT,
 SRC_DIR = os.path.join(PROJECT_ROOT, 'src')
 sys.path.append(SRC_DIR) # Or sys.path.append(PROJECT_ROOT) if src is directly under it'''

#create directories if they don't exist (for outputs)
os.makedirs(EXTRACTED_FRAMES_DIR, exist_ok=True)
os.makedirs(TRACKING_OUTPUTS_DIR, exist_ok=True)
#os.makedirs(BEHAVIOUR_ANNOTATIONS_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Project Root: {PROJECT_ROOT}")
print(f"Raw Videos Dir: {RAW_VIDEOS_DIR}")
print(f"Extracted Frames Dir: {EXTRACTED_FRAMES_DIR}")
print(f"Tracking Outputs Dir: {TRACKING_OUTPUTS_DIR}")
print(f"Model Dir: {MODEL_DIR}")

#list files in raw_videos to verify
print("\nFiles in raw_videos directory:")
if os.path.exists(RAW_VIDEOS_DIR):
    for f in os.listdir(RAW_VIDEOS_DIR):
        print(f"- {f}")
else:
    print(f"Directory not found: {RAW_VIDEOS_DIR}. Please check the path and your Drive setup.")

Mounted at /content/drive
Project Root: /content/drive/My Drive/car-behaviour-project
Raw Videos Dir: /content/drive/My Drive/car-behaviour-project/data/raw_videos
Extracted Frames Dir: /content/drive/My Drive/car-behaviour-project/data/extracted_frames
Tracking Outputs Dir: /content/drive/My Drive/car-behaviour-project/data/tracking_outputs
Model Dir: /content/drive/My Drive/car-behaviour-project/models/yolov8

Files in raw_videos directory:
- video1.MOV
- video2.MOV
- video4.MOV
- video3.MOV


In [3]:
#Frame Extraction Logic
import sys

#configuration
#select any of the files in data/raw_videos
VIDEO_FILENAME = "video1.MOV"
VIDEO_PATH = os.path.join(RAW_VIDEOS_DIR, VIDEO_FILENAME)

TARGET_FPS = 10  #extract 10 frames per sec of video

#frame extraction function
def extract_frames_from_video(video_path, output_base_folder, target_fps_extraction):
    """
    Extracts frames from a video file at a specified target FPS.

    Args:
        video_path (str): Path to the input video file.
        output_base_folder (str): Base directory to save extracted frames.
                                  A subfolder named after the video will be created here.
        target_fps_extraction (int): The desired number of frames to extract per second of video.
    """
    if not os.path.exists(video_path):
        print(f"Error: Video file not found at {video_path}")
        print("Please ensure VIDEO_FILENAME is set correctly and the file exists in RAW_VIDEOS_DIR.")
        return False

    try:
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        video_output_folder = os.path.join(output_base_folder, video_name)
        os.makedirs(video_output_folder, exist_ok=True)
    except Exception as e:
        print(f"Error creating output directory {video_output_folder}: {e}")
        return False

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return False

    original_fps = cap.get(cv2.CAP_PROP_FPS)
    if original_fps == 0: #could happen if metadata is missing or video is corrupted
        print(f"Warning: Could not read original FPS from video '{video_name}'. Assuming 30 FPS for frame selection.")
        original_fps = 30.0 #default assumption

    #calculate the frame skip interval
    if target_fps_extraction <= 0:
        print("Error: Target FPS must be positive.")
        cap.release()
        return False

    frame_skip_interval = original_fps / target_fps_extraction
    if frame_skip_interval < 1: #ff target FPS is higher than original, extract every frame
        print(f"Warning: Target FPS ({target_fps_extraction}) is higher than or equal to original FPS ({original_fps}). Extracting all frames.")
        frame_skip_interval = 1
    else:
        #we want to select one frame every 'frame_skip_interval' original frames.
        #rounding is important if the division isn't perfect.
        frame_skip_interval = round(frame_skip_interval)


    frame_id_in_video = 0
    saved_frame_count = 0
    next_frame_to_capture = 0

    print(f"\nProcessing video: {video_name}")
    print(f"Original FPS (or assumed): {original_fps:.2f}")
    print(f"Target extraction FPS: {target_fps_extraction}")
    print(f"Will attempt to save 1 frame approximately every {frame_skip_interval} original frames.")

    while True:
        ret, frame = cap.read()
        if not ret:
            break  #end of video or error reading frame

        #check if this is a frame we want to save
        if frame_id_in_video >= next_frame_to_capture:
            #naming convention: videoName_frame_originalFrameId_savedFrameCounter.png
            #using original frame ID can be useful for debugging with original video
            #using a saved_frame_count ensures sequential numbering of saved frames
            frame_filename = os.path.join(video_output_folder, f"{video_name}_f{saved_frame_count:05d}_orig{frame_id_in_video:06d}.png")
            try:
                cv2.imwrite(frame_filename, frame)
                saved_frame_count += 1
                next_frame_to_capture += frame_skip_interval
            except Exception as e:
                print(f"Error writing frame {frame_filename}: {e}")
                #decide if you want to stop or continue
                #for now, we'll just print and continue

        frame_id_in_video += 1

        #print progress
        if frame_id_in_video % 100 == 0:
            print(f"Processed {frame_id_in_video} original frames. Saved {saved_frame_count} frames...")

    cap.release()
    print(f"Finished processing. Extracted {saved_frame_count} frames to {video_output_folder}")
    return True

#main Execution
if __name__ == "__main__" and 'google.colab' in sys.modules: #check if running in Colab for specific instructions
    print("\nRunning Frame Extraction")
    #ensure VIDEO_FILENAME is set correctly above.
    #this should match a file you've uploaded to PROJECT_ROOT/data/raw_videos/ in your Google Drive.
    if VIDEO_FILENAME == "your_video.mp4" or not os.path.exists(VIDEO_PATH):
        print(f"IMPORTANT")
        print(f"Please edit the 'VIDEO_FILENAME' variable in the cell above.")
        print(f"Set it to the name of a video file that exists in: '{RAW_VIDEOS_DIR}'.")
        print(f"Available videos (if any, check output of first cell):")
        if os.path.exists(RAW_VIDEOS_DIR):
            for f_name in sorted(os.listdir(RAW_VIDEOS_DIR)):
                 if not f_name.startswith('.'): #ignore hidden files
                    print(f"  - {f_name}")
    else:
        print(f"Attempting to extract frames from: {VIDEO_PATH}")
        print(f"Outputting to a subfolder within: {EXTRACTED_FRAMES_DIR}")
        success = extract_frames_from_video(VIDEO_PATH, EXTRACTED_FRAMES_DIR, TARGET_FPS)
        if success:
            video_name_no_ext = os.path.splitext(VIDEO_FILENAME)[0]
            final_output_folder = os.path.join(EXTRACTED_FRAMES_DIR, video_name_no_ext)
            print(f"Frames should be in: {final_output_folder}")
            #list some extracted frames for verification
            if os.path.exists(final_output_folder) and os.listdir(final_output_folder):
                print(f"\nFirst 5 extracted frames in {final_output_folder}:")
                for i, f in enumerate(sorted(os.listdir(final_output_folder))):
                    if i < 5:
                        print(f"- {f}")
                    else:
                        break
            else:
                 print(f"No frames found in {final_output_folder}. Check for errors above.")


Running Frame Extraction
Attempting to extract frames from: /content/drive/My Drive/car-behaviour-project/data/raw_videos/video1.MOV
Outputting to a subfolder within: /content/drive/My Drive/car-behaviour-project/data/extracted_frames

Processing video: video1
Original FPS (or assumed): 30.00
Target extraction FPS: 10
Will attempt to save 1 frame approximately every 3 original frames.
Processed 100 original frames. Saved 34 frames...
Processed 200 original frames. Saved 67 frames...
Processed 300 original frames. Saved 100 frames...
Processed 400 original frames. Saved 134 frames...
Processed 500 original frames. Saved 167 frames...
Processed 600 original frames. Saved 200 frames...
Processed 700 original frames. Saved 234 frames...
Processed 800 original frames. Saved 267 frames...
Processed 900 original frames. Saved 300 frames...
Processed 1000 original frames. Saved 334 frames...
Processed 1100 original frames. Saved 367 frames...
Processed 1200 original frames. Saved 400 frames..