In [56]:
import cv2
import os
import pandas as pd
from pathlib import Path
from ultralytics import YOLO

In [57]:
# Directory that contains annotations and video files
data_path = "../data/"
ann_path = data_path+"annotations/"
video_path = data_path+"video/"

yolo_path = data_path+"yolo/"

label_map = {'"Biker"': 0,
            '"Pedestrian"': 1,
            '"Skater"': 2,
            '"Cart"': 3,
            '"Car"': 4,
            '"Bus"': 5}

In [76]:
#Data formatter
def get_mp4_paths_pathlib(root_dir):
    """
    Extract all MP4 file paths using pathlib
    
    Args:
        root_dir: Root directory to search
    
    Returns:
        List of Path objects pointing to MP4 files
    """
    root = Path(root_dir)
    mp4_files = list(root.rglob("*.mp4"))  # rglob = recursive glob
    return mp4_files

def get_ann_path_from_video(video_path):
    """
    Extract all MP4 file paths using pathlib
    
    Args:
        root_dir: Root directory to search
    
    Returns:
        List of Path objects pointing to MP4 files
    """
    ann_path = str(video_path).replace("video.mp4", "annotations.txt")
    ann_path = ann_path.replace("/video/","/annotations/")
    print(f'ann_path: {ann_path}')
    return ann_path

def convert_txt_to_df(txt_file):
    df = pd.read_csv(txt_file, sep=r'\s+', header=None,
                 names=['Track ID', 'xmin', 'ymin', 'xmax','ymax','frame','lost','occluded', 'generated', 'label'],
                 engine='python')
    return df

def bbox_to_yolo(xmin, ymin, xmax, ymax, img_width, img_height):
    """
    Convert bounding box coordinates to YOLO format
    
    Args:
        xmin, ymin, xmax, ymax: Bounding box coordinates in pixels
        img_width, img_height: Image dimensions in pixels
    
    Returns:
        x_center, y_center, width, height: Normalized coordinates (0-1)
    """
    # Calculate center coordinates
    x_center = ((xmin + xmax) / 2) / img_width
    y_center = ((ymin + ymax) / 2) / img_height
    
    # Calculate width and height
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height
    
    return x_center, y_center, width, height

    
def create_yolo_files(output_dir, video_path, frame_cnt):

    scene = video_path.split('/')[-3]
    video_base = video_path.split('/')[-2]
    frame = f"frame_{frame_cnt}"

    frame_ann = output_dir+"/"+scene+"_"+video_base+"_"+frame+".txt"
    frame_png = output_dir+"/"+scene+"_"+video_base+"_"+frame+".png"

    return frame_ann, frame_png

def create_yolo_ann(output_file, yolo_data):

    with open(output_file, "w") as f:
        for data in yolo_data:
            x_center = data["x_center"]
            y_center = data["y_center"]
            width = data["width"]
            height = data["height"]
            label = data["label"]
            
            f.write(f"{label} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")    

In [77]:
videos = get_mp4_paths_pathlib(video_path)

In [81]:
# Extract video
def extract_frames(videos, label_map, output_dir, frame_interval=1):
    """Extract frames from video"""

        
    for video_path in videos[0:1]:

        #Extract annotation
        ann = get_ann_path_from_video(video_path=video_path)
        ann_df = convert_txt_to_df(ann)

        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        saved_count = 0
        
        while cap.isOpened():
            ret, frame = cap.read()

            if not ret:
                break

            if frame_count % frame_interval == 0:
                frame_w, frame_h = frame.shape[:2]

                frame_ann = ann_df[ann_df['frame']==frame_count]

                frame_ann_file, frame_png_file = create_yolo_files(output_dir, str(video_path), frame_count)

                yolo_data = []

                for id, row in frame_ann.iterrows():

                    xmin = row["xmin"]
                    ymin = row["ymin"]
                    xmax = row["xmax"]
                    ymax = row["ymax"]
                    img_width = frame_w
                    img_height = frame_h

                    x_center, y_center, width, height = bbox_to_yolo(xmin, ymin, xmax, ymax, img_width=img_width, img_height=img_height)

                    label = row["label"]

                    if label not in label_map:
                        raise KeyError
                    else:
                        label = label_map[label]

                    yolo_data.append({
                        'x_center': x_center,
                        'y_center': y_center,
                        'width': width,
                        'height': height,
                        'label': label
                    })

                
                create_yolo_ann(frame_ann_file, yolo_data)
                    
                    

            frame_count+=1


            

        cap.release()

In [82]:
extract_frames(videos=videos, label_map=label_map, output_dir=yolo_path, frame_interval=100)

ann_path: ../data/annotations/gates/video3/annotations.txt
