This notebook is built upon `eda_preliminary.ipynb`, the reason is because after training the object detection model based on the curated data <br> 
I realised that the validation loss is gradually increasing. This prompts me to rediscover the dataset and check the annotations manually. <br> 
The following videos has bad annotations with description:
<ol>
<li> P03_11 -> Bounding box annotations are not aligned with the hand, bounding boxes detects else where </li>
<li> P03_12 -> Bounding box annotations are not aligned with the hand, bounding boxes detects else where </li>
<li> P04_11 -> Bad quality annotations </li>
<li> P04_18 -> Bad quality annotations</li>
<li> P04_23 -> Bad quality annotations </li>

</ol> 

In [1]:
import pandas as pd
import os
import cv2
from glob import glob
import json
import os

In [2]:
# helper function
def convert_json_to_yolo_txt(
    json_path: str,
    output_dir: str,
    original_img_size: tuple[int, int],
    resize_img_size: tuple[int, int] = (1280, 920),
    one_category: bool = False,
) -> None:
    """
    Converts a hand bbox annotation JSON file to YOLO format .txt file (one line per bbox):
    YOLO format (class x_center y_center width height, all normalized [0,1])
    Assumes class 0 for hands. If img_width/height is not given, use video info if available.
    """
    # If output .txt path is not provided, replace .json with .txt
    img_name = os.path.basename(json_path).replace(".json", "")
    video_name = os.path.basename(os.path.dirname(json_path))
    txt_path = os.path.join(output_dir, f"{video_name}_{img_name}.txt")

    # First, try reading the image dimensions from the JSON ("image_width", "image_height")
    with open(json_path, "r") as f:
        data = json.load(f)

    img_width = original_img_size[0]
    img_height = original_img_size[1]
    img_width_reshape = resize_img_size[0]
    img_height_reshape = resize_img_size[1]
    img_width_reshape_ratio = img_width_reshape / img_width
    img_height_reshape_ratio = img_height_reshape / img_height
    lines = []
    # The bboxes are usually in "annotations": [{"bbox": [x, y, w, h], ...}, ...]
    for ann in data.get("annotations", []):
        bbox = ann.get("bbox")
        id = ann.get("id")
        if bbox is None or len(bbox) != 4:
            continue
        x, y, w, h = bbox
        x = x * img_width_reshape_ratio
        y = y * img_height_reshape_ratio
        w = w * img_width_reshape_ratio
        h = h * img_height_reshape_ratio
        # Convert to YOLO: normalized center_x, center_y, width, height
        x_center = (x + w / 2) / img_width_reshape
        y_center = (y + h / 2) / img_height_reshape
        w_norm = w / img_width_reshape
        h_norm = h / img_height_reshape
        # Clamp between 0 and 1
        x_center = min(max(x_center, 0), 1)
        y_center = min(max(y_center, 0), 1)
        w_norm = min(max(w_norm, 0), 1)
        h_norm = min(max(h_norm, 0), 1)
        # class index (0), then the values
        if one_category:
            lines.append(f"{0} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")
        else:
            lines.append(
                f"{id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}"
            )
    # only convert the video with valid annotations
    if len(lines) > 0:
        with open(txt_path, "w") as f:
            f.write("\n".join(lines))
        return True
    else:
        return False

def get_video_frame(
    video_path: str,
    frame_index: int,
    output_dir: str,
    resize_img_size: tuple[int, int] = (1280, 920),
) -> None:
    """
    Get a frame from a video and save it as a jpg file.
    Only fix to jpg file for simplicity. 
    """
    cap = cv2.VideoCapture(video_path)
    video_name = os.path.basename(video_path).replace(".mp4", "")
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
    ret, frame = cap.read()
    frame = cv2.resize(frame, resize_img_size)
    output_path = os.path.join(output_dir, f"{video_name}_frame_{frame_index:06d}.jpg")
    cv2.imwrite(output_path, frame)
    cap.release()

In [5]:
# list out all relevelant file paths and directories
output_dir = "/home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/ultralytics_training"
csv_pth = "./outputs/info_df.csv"
anno_dir = (
    "/home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/bbox"
)
video_dir = (
    "/home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/videos"
)
save_img_dir = "/home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/ultralytics_training/images"
save_anno_dir = "/home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/ultralytics_training/labels"
resize_img_size = (1280, 920)

df = pd.read_csv(csv_pth)
# do not include videos with bad annotations
video_id_to_skip = ["P03_11", "P03_12", "P04_11", "P04_18", "P04_23"]

error_df = []
print("Starting to process videos...")
for video_id in df["video_id"]:
    # skip videos with bad annotations
    if video_id in video_id_to_skip:
        print(f"Skipping video: {video_id} due to bad annotations")
        continue
    video_path = os.path.join(video_dir, f"{video_id}.mp4")
    anno_path = os.path.join(anno_dir, f"{video_id}")
    print(f"Processing video from: {video_path}")
    print(f"Annotations found in folder: {anno_path}")
    # the video width and height for resizing
    width = df.loc[df["video_id"] == video_id, "width"].values[0] 
    height = df.loc[df["video_id"] == video_id, "height"].values[0]
    # get split status
    split = df.loc[df["video_id"] == video_id, "split"].values[0]
    # create save directory
    save_img_dir_split = os.path.join(save_img_dir, split)
    save_anno_dir_split = os.path.join(save_anno_dir, split)
    if not os.path.exists(save_img_dir_split):
        os.makedirs(save_img_dir_split, exist_ok=True)
    if not os.path.exists(save_anno_dir_split):
        os.makedirs(save_anno_dir_split, exist_ok=True)
    # loop through the annotations correlated to the video id 
    # the annotations needed to be sorted to ensure the frame index is in sequence order
    for anno_json in sorted(glob(os.path.join(anno_path, "*.json"))):
        frame_index = int(os.path.basename(anno_json).replace(".json", "").split("_")[-1])
        # convert the annotation to yolo format
        anno_exist = convert_json_to_yolo_txt(
            anno_json,
            save_anno_dir_split,
            original_img_size=(width, height),
            resize_img_size=resize_img_size,
            one_category=False
        )
        # skip retriveing the video frame with the annotation is not valid
        if not anno_exist:
            continue
        try:
            # get the video frame and resize them
            get_video_frame(
                video_path,
                frame_index,
                output_dir=save_img_dir_split,
                resize_img_size=resize_img_size
            )
        except Exception as e:
            print(f"Error processing video {video_id} at frame {frame_index}: {e}")
            # log the error frames for inspection
            log_df = pd.DataFrame({"video_id": [video_id], "frame_index": [frame_index]})
            error_df.append(log_df)
            # no need to have converted .txt file if there is not frame
            continue

    print(f"Finished processing video: {video_id}")
    print("-"*100)


Starting to process videos...
Skipping video: P03_11 due to bad annotations
Processing video from: /home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/videos/P03_10.mp4
Annotations found in folder: /home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/bbox/P03_10
Finished processing video: P03_10
----------------------------------------------------------------------------------------------------
Processing video from: /home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/videos/P05_28.mp4
Annotations found in folder: /home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/bbox/P05_28
Finished processing video: P05_28
----------------------------------------------------------------------------------------------------
Processing video from: /home/kevinteng/Desktop/dataset/trauma_thompson_dataset/task2_hands/train/videos/P03_08.mp4
Annotations found in folder: /home/kevinteng/Desktop/dataset/trauma_thompson_da