# Configuration

## Import packages

In [5]:
import cv2
import os
import xml.etree.ElementTree as ET
import json
import math
import concurrent.futures

# Data Preprocessing

## Extract frames from CVAT annotation

In [6]:
def process_frame(frame_count, frame, annotated_frames, frame_subdir):
    """
    Processes a single frame: saves it if it's annotated.  This function
    is designed to be run in a separate thread.

    Args:
        frame_count (int): The frame number.
        frame (numpy.ndarray): The frame data (image).
        annotated_frames (set): Set of annotated frame numbers.
        frame_subdir (str): Path to the subdirectory to save frames.
    """
    if frame_count in annotated_frames:
        frame_filename = f"frame{frame_count:05d}.jpg"
        frame_path = os.path.join(frame_subdir, frame_filename)
        cv2.imwrite(frame_path, frame)

def convert_video_to_frames(video_dir, frame_dir, annotation_dir):
    """
    Converts video files to individual frames, saving them to the specified
    directory. Skips frames that are not annotated in the corresponding CVAT
    XML file. Uses multithreading to speed up the process.

    Args:
        video_dir (str): Path to the directory containing video files.
        frame_dir (str): Path to the directory where frames will be saved.
        annotation_dir (str): Path to the directory containing CVAT XML
            annotations.
    """

    for video_file in os.listdir(video_dir):
        if video_file.endswith(".mp4"):  # Or other video extensions
            video_path = os.path.join(video_dir, video_file)
            video_name = os.path.splitext(video_file)[0]
            frame_subdir = os.path.join(frame_dir, video_name)

            # Check if the frames for this video have already been extracted
            if os.path.exists(frame_subdir) and len(os.listdir(frame_subdir)) > 0:
                print(f"Frames for {video_file} already extracted. Skipping.")
                continue

            os.makedirs(frame_subdir, exist_ok=True)

            # Find corresponding annotation file
            annotation_file = f"annotation_{video_name.replace('video', 'cvat')}.xml"
            annotation_path = os.path.join(annotation_dir, annotation_file)

            if not os.path.exists(annotation_path):
                print(f"Annotation file not found for {video_file}. Skipping.")
                continue

            # Parse annotation file to get annotated frame numbers
            tree = ET.parse(annotation_path)
            root = tree.getroot()
            annotated_frames = set()
            for track in root.findall(".//track"):
                for box in track.findall(".//box"):
                    annotated_frames.add(int(box.get("frame")))

            cap = cv2.VideoCapture(video_path)
            frame_count = 0

            with concurrent.futures.ThreadPoolExecutor() as executor:
                futures = []
                while True:
                    ret, frame = cap.read()
                    if not ret:
                        break

                    # Submit the frame processing to the thread pool
                    future = executor.submit(process_frame, frame_count, frame, annotated_frames, frame_subdir)
                    futures.append(future)

                    frame_count += 1

                # Wait for all frame processing tasks to complete
                for future in concurrent.futures.as_completed(futures):
                    try:
                        future.result()  # Get the result (or exception)
                    except Exception as e:
                        print(f"Error processing frame: {e}")

            cap.release()
            print(f"Frames extracted from {video_file} to {frame_subdir}")

## Create COCO JSON from CVAT annotation and extracted frames

In [7]:
def rotate_point(x, y, angle, width, height):
    """
    Rotate a point (x, y) by a given angle (in degrees) around the center of an image.

    Args:
        x (float): X-coordinate of the point.
        y (float): Y-coordinate of the point.
        angle (float): Rotation angle in degrees.
        width (int): Width of the image.
        height (int): Height of the image.

    Returns:
        tuple: Rotated (x, y) coordinates.
    """
    # Convert angle to radians
    rad = math.radians(angle)
    # Translate point to origin
    x -= width / 2
    y -= height / 2
    # Rotate point
    rotated_x = x * math.cos(rad) - y * math.sin(rad)
    rotated_y = x * math.sin(rad) + y * math.cos(rad)
    # Translate point back to original position
    rotated_x += width / 2
    rotated_y += height / 2
    return rotated_x, rotated_y

def convert_cvat_to_coco(cvat_dir, coco_dir):
    """
    Converts CVAT XML annotations to COCO JSON format.  Creates subdirectories
    in the COCO directory for each CVAT XML file.

    Args:
        cvat_dir (str): Path to the directory containing CVAT XML annotations.
        coco_dir (str): Path to the directory where COCO JSON files will be saved.
    """
    os.makedirs(coco_dir, exist_ok=True)

    for cvat_file in os.listdir(cvat_dir):
        if not cvat_file.endswith(".xml"):
            continue

        cvat_path = os.path.join(cvat_dir, cvat_file)
        tree = ET.parse(cvat_path)
        root = tree.getroot()

        # Extract image width and height from CVAT metadata
        width = int(root.find(".//original_size/width").text)
        height = int(root.find(".//original_size/height").text)

        # Get label names
        labels = []
        for label in root.findall(".//label/name"):
            labels.append(label.text)

        # Create a dictionary to store COCO data for each frame
        coco_data_per_frame = {}

        # --- Create subdirectory for this CVAT file ---
        cvat_name = os.path.splitext(cvat_file)[0]
        coco_subdir = os.path.join(coco_dir, cvat_name)
        os.makedirs(coco_subdir, exist_ok=True)  # Create the subdirectory


        # Process annotations for each track
        for track in root.findall(".//track"):
            label = track.get("label")
            label_id = labels.index(label) + 1  # COCO category IDs start from 1

            for box in track.findall(".//box"):
                frame_num = int(box.get("frame"))
                xtl = float(box.get("xtl"))
                ytl = float(box.get("ytl"))
                xbr = float(box.get("xbr"))
                ybr = float(box.get("ybr"))
                rotation = float(box.get("rotation", 0.0))  # Default to 0 if not present

                # Rotate bounding box coordinates
                if rotation != 0:
                    center_x = (xtl + xbr) / 2
                    center_y = (ytl + ybr) / 2

                    xtl, ytl = rotate_point(xtl, ytl, rotation, width, height)
                    xbr, ybr = rotate_point(xbr, ybr, rotation, width, height)
                    center_x, center_y = rotate_point(center_x, center_y, rotation, width, height)

                    # Recalculate xtl, ytl, xbr, ybr based on rotated center and dimensions
                    width_box = abs(xbr - xtl)
                    height_box = abs(ybr- ytl)
                    xtl = center_x - width_box / 2
                    ytl = center_y - height_box / 2
                    xbr = center_x + width_box / 2
                    ybr = center_y + height_box / 2

                # Calculate bounding box width and height
                bbox_width = xbr - xtl
                bbox_height = ybr - ytl

                # Get attributes
                attributes = {}
                for attribute in box.findall(".//attribute"):
                    attributes[attribute.get("name")] = attribute.text

                # Prepare COCO data for this frame (if not already created)
                if frame_num not in coco_data_per_frame:
                    coco_data_per_frame[frame_num] = {
                        "info": {},
                        "licenses": [],
                        "categories": [{"id": i + 1, "name": label, "supercategory": ""} for i, label in enumerate(labels)],
                        "images": [],
                        "annotations": []
                    }
                    # Add image information (assuming 1 image per frame)
                    coco_data_per_frame[frame_num]["images"].append({
                        "id": frame_num,
                        "width": width,
                        "height": height,
                        "file_name": f"frame{frame_num:05d}.jpg",  # Use consistent naming
                    })

                # Add annotation
                coco_data_per_frame[frame_num]["annotations"].append({
                    "id": len(coco_data_per_frame[frame_num]["annotations"]) + 1,
                    "image_id": frame_num,
                    "category_id": label_id,
                    "bbox": [xtl, ytl, bbox_width, bbox_height],
                    "area": bbox_width * bbox_height,
                    "iscrowd": 0,
                    "attributes": attributes
                })

        # Save COCO data for each frame to separate files *within the subdirectory*
        for frame_num, coco_data in coco_data_per_frame.items():
            coco_filename = f"{os.path.splitext(cvat_file)[0]}_coco_frame{frame_num:05d}.json"
            coco_path = os.path.join(coco_subdir, coco_filename)  # Save in subdirectory

            # Check if COCO file already exists
            if os.path.exists(coco_path):
                print(f"COCO file {coco_filename} already exists. Skipping.")
                continue

            with open(coco_path, "w") as f:
                json.dump(coco_data, f, indent=4)
            print(f"Converted {cvat_file} (frame {frame_num}) to {coco_filename}")

## Execution

In [8]:
if __name__ == "__main__":
    video_directory = "../data/video"
    frame_directory = "../data/frame"
    annotation_directory = "../data/annotation/cvat"
    coco_annotation_directory = "../data/annotation/coco"

    convert_video_to_frames(video_directory, frame_directory, annotation_directory)
    convert_cvat_to_coco(annotation_directory, coco_annotation_directory)
    print("Done!")


Frames extracted from video_001.mp4 to ../data/frame\video_001
Frames extracted from video_002.mp4 to ../data/frame\video_002
Frames extracted from video_003.mp4 to ../data/frame\video_003
Frames extracted from video_004.mp4 to ../data/frame\video_004
Frames extracted from video_005.mp4 to ../data/frame\video_005
Frames extracted from video_006.mp4 to ../data/frame\video_006
Frames extracted from video_007.mp4 to ../data/frame\video_007
Frames extracted from video_008.mp4 to ../data/frame\video_008
Frames extracted from video_009.mp4 to ../data/frame\video_009
Frames extracted from video_010.mp4 to ../data/frame\video_010
COCO file annotation_cvat_001_coco_frame00000.json already exists. Skipping.
COCO file annotation_cvat_001_coco_frame00001.json already exists. Skipping.
COCO file annotation_cvat_001_coco_frame00002.json already exists. Skipping.
COCO file annotation_cvat_001_coco_frame00003.json already exists. Skipping.
COCO file annotation_cvat_001_coco_frame00004.json already exi