## Import

In [10]:
import os
import sys
import argparse
import numpy as np
import supervision as sv
from pathlib import Path
from ultralytics import YOLO
from dotenv import load_dotenv

## Path & Env

In [11]:
# path
current_path = Path(os.getcwd())
sys.path.insert(0, current_path)

In [12]:
# env
dotenv_path = f"{current_path}/.env"
load_dotenv(dotenv_path=dotenv_path, override=True) 

False

## Zone Annotator

In [13]:
frame_width, frame_height = 1280, 720 
zone_polygon = np.array([
    [363, 270],
    [1016, 268],
    [1046, 788],
    [432, 757],
])

zone = sv.PolygonZone(
    polygon=zone_polygon
)

In [14]:
zone_annotator = sv.PolygonZoneAnnotator(
    zone=zone,
    color=sv.Color.WHITE,
    text_color=sv.Color.BLACK,
    thickness=2,
    text_thickness=2,
    text_scale=0.5
)

## Model

In [15]:
model = YOLO("yolov8n.pt")
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
tracker = sv.ByteTrack()



## Frame Processing

In [36]:
def process_frame(frame: np.ndarray, frame_index: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)

    # filter person obj
    person_class_id = 0 
    detections = detections[detections.class_id == person_class_id]

    # ID tracking
    detections = tracker.update_with_detections(detections)

    # zone tracking
    is_in_zone = zone.trigger(detections=detections)
    detections_in_zone = detections[is_in_zone]

    # get person in zone data
    class_ids = detections_in_zone.class_id
    tracker_ids = detections_in_zone.tracker_id
    xyxys = detections_in_zone.xyxy
    masks = detections_in_zone.mask if detections_in_zone.mask is not None else [None] * len(detections_in_zone)
    frame_data = []
    for class_id, tracker_id, bbox, mask in zip(class_ids, tracker_ids, xyxys, masks):
        info = {
            "frame_index": frame_index,
            "class_id": class_id,
            "class_name": model.model.names[class_id],
            "tracker_id": tracker_id,
            "bbox": bbox,
            "mask": mask
        }
        frame_data.append(info)
    print(f"Total {len(frame_data)} PERSON IN ZONE: {frame_data}")

    # label with class name + ID
    labels = []
    for class_id, tracker_id in zip(detections_in_zone.class_id, detections_in_zone.tracker_id):
        class_name = model.model.names[class_id] if class_id is not None else "unknown"
        label = f"{class_name} #{tracker_id}" if tracker_id is not None else class_name
        labels.append(label)

    # draw boxing
    annotated_frame = box_annotator.annotate(
        scene=frame.copy(),
        detections=detections_in_zone
    )

    # draw labels
    annotated_frame = label_annotator.annotate(
        scene=annotated_frame,
        detections=detections_in_zone,
        labels=labels
    )

    # zone annotation
    annotated_frame = zone_annotator.annotate(
        scene=annotated_frame,
        label=f"Count: {zone.current_count}"
    )

    return annotated_frame

## Main Process

In [37]:
def main(source_video_path: str):
    target_video_path = f"{current_path.parent}/result/result_zone_tracking.mp4"
    print(f"Processing video from '{source_video_path}' and saving to '{target_video_path}'...")
    sv.process_video(
        source_path=source_video_path,
        target_path=target_video_path,
        callback=process_frame
    )
    print("Video processing complete.")

In [38]:
if __name__ == "__main__":
    video_path = f"{current_path.parent}/videos/finance_20251001_1000_1001.mp4"
    main(video_path)

Processing video from '/mnt/batch/tasks/shared/LS_root/mounts/clusters/gpu-8-56-352-16-jh/code/Users/jeremy.heng/opencv_yolo_supervision/videos/finance_20251001_1000_1001.mp4' and saving to '/mnt/batch/tasks/shared/LS_root/mounts/clusters/gpu-8-56-352-16-jh/code/Users/jeremy.heng/opencv_yolo_supervision/result/result_zone_tracking.mp4'...

0: 384x640 3 persons, 2 bottles, 1 cup, 1 chair, 1 laptop, 6.1ms
Speed: 1.8ms preprocess, 6.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Total 2 PERSON IN ZONE: [{'frame_index': 0, 'class_id': np.int64(0), 'class_name': 'person', 'tracker_id': np.int64(66), 'bbox': array([     700.63,      336.99,      853.11,      531.06], dtype=float32), 'mask': None}, {'frame_index': 0, 'class_id': np.int64(0), 'class_name': 'person', 'tracker_id': np.int64(2), 'bbox': array([     863.32,      269.22,      975.15,      420.52], dtype=float32), 'mask': None}]

0: 384x640 4 persons, 2 bottles, 1 cup, 1 chair, 1 laptop, 6.1ms
Speed: 1.8ms prep