# Video-inference to CSV

In [11]:
from ultralytics import YOLO
import glob
import os
import pandas as pd
import cv2
from pathlib import Path

In [2]:
model = YOLO("runs/detect/train4/weights/best.pt")

In [3]:
videos_dir = 'Arma3 Tracking/Video'

In [6]:
videos_path = glob.glob(os.path.join(videos_dir, '*.mp4'))

In [17]:
for video_path in videos_path:
    cap = cv2.VideoCapture(video_path)
    results_list = []
    frame_index = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        results = model(frame)[0]
        for box in results.boxes:
            cls_id = int(box.cls.item())
            conf = float(box.conf.item())
            # xyxy format (left, top, right, bottom)
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            results_list.append({
                "frame_index": frame_index,
                "class_id": cls_id,
                "top_left_x": x1,
                "top_left_y": y1,
                "bottom_right_x": x2,
                "bottom_right_y": y2,
                "conf_score": round(conf, 4)
            })
        frame_index += 1
    cap.release()

    df = pd.DataFrame(results_list)
    videoname = os.path.basename(video_path).replace('.mp4', '')
    output_path = Path(f"{videoname}_for_linh.csv")
    df.to_csv(output_path, index=False)


0: 736x1280 2 soldiers, 3.0ms
Speed: 17.7ms preprocess, 3.0ms inference, 0.6ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.7ms
Speed: 2.5ms preprocess, 2.7ms inference, 1.1ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.4ms
Speed: 2.5ms preprocess, 2.4ms inference, 0.5ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.6ms
Speed: 2.1ms preprocess, 2.6ms inference, 0.5ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.5ms
Speed: 2.6ms preprocess, 2.5ms inference, 0.5ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.5ms
Speed: 2.1ms preprocess, 2.5ms inference, 0.5ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 3.1ms
Speed: 3.1ms preprocess, 3.1ms inference, 0.9ms postprocess per image at shape (1, 3, 736, 1280)

0: 736x1280 2 soldiers, 2.6ms
Speed: 2.8ms preprocess, 2.6ms inference, 2.0ms postprocess per image at

# CSV to Video-visualization

In [18]:
import cv2
import pandas as pd
from pathlib import Path

video_path = "Arma3 Tracking/Video/G_00001_0_2_1_rec_0.mp4"
csv_path = "G_00001_0_2_1_rec_0_for_linh.csv"
output_video_path = "G_00001_0_2_1_rec_0_labeling.mp4"

df = pd.read_csv(csv_path)

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))

frame_index = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_boxes = df[df['frame_index'] == frame_index]

    for _, row in frame_boxes.iterrows():
        x1, y1 = int(row['top_left_x']), int(row['top_left_y'])
        x2, y2 = int(row['bottom_right_x']), int(row['bottom_right_y'])
        cls_id = int(row['class_id'])
        conf = float(row['conf_score'])

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        label = f"Class {cls_id} ({conf:.2f})"
        cv2.putText(frame, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 255, 0), 2)

    out.write(frame)
    frame_index += 1

cap.release()
out.release()