In [1]:
import os
from ultralytics import YOLO

# Frame-by-frame extraction of the original video

In [None]:
import cv2
import os
from tqdm import tqdm

origin_videos_path = "origin_videos"
output_root_path = "predict_datasets"

os.makedirs(output_root_path, exist_ok=True)

for video_file in os.listdir(origin_videos_path):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(origin_videos_path, video_file)
        video_stem = os.path.splitext(video_file)[0]
        output_dir = os.path.join(output_root_path, video_stem)
        os.makedirs(output_dir, exist_ok=True)

        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        if len(os.listdir(output_dir)) >= total_frames:
            cap.release()
            continue

        frame_idx = 1

        for _ in tqdm(range(total_frames), desc=video_stem):
            ret, frame = cap.read()
            if not ret:
                break

            frame_filename = f"{video_stem}_{frame_idx:06d}.jpg"
            frame_path = os.path.join(output_dir, frame_filename)

            if not os.path.exists(frame_path):
                cv2.imwrite(frame_path, frame)

            frame_idx += 1

        cap.release()
        print(f"{video_file} completed! \n")

# Perform prediction on the extracted images frame by frame

In [None]:
# Define the list of camera device names to use
predict_datasets_root = "predict_datasets"
camera_devices = [d for d in os.listdir(predict_datasets_root) if os.path.isdir(os.path.join(predict_datasets_root, d))]

# Load the trained YOLOv12 model
model_path = "runs_UAV/yolov12n_bs48_img960/weights/best.pt"
model = YOLO(model_path)

# Iterate over each camera device for prediction
for camera_device in camera_devices:
    # Define paths for the image folder and output folder
    image_folder = os.path.join(predict_datasets_root, camera_device)
    output_folder = os.path.join(r"predict_results", camera_device)

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    image_files = [
        f for f in os.listdir(image_folder)
        if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))
    ]

    print(f"📷 {camera_device} is being processed")
    
    for image_name in tqdm(image_files, desc=f"{camera_device}"):
        # Get the full image path
        image_path = os.path.join(image_folder, image_name)
        
        # Skip non-image files
        if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            continue
        
        # Perform object detection
        results = model.predict(image_path, conf=0.25, iou=0.7, max_det=1, device="cuda:0", verbose=False)
        
        # If a result is detected and there are boxes, process and save it
        if results and len(results[0].boxes) > 0:
            result = results[0]
            img = result.orig_img
            img_height, img_width = img.shape[:2]
            obj = result.boxes[0]
            class_id = int(obj.cls.item())
            confidence = float(obj.conf.item())
            bbox = obj.xyxy[0]
            
            # Calculate YOLO format label
            x_min, y_min, x_max, y_max = bbox
            x_center = (x_min + x_max) / 2 / img_width
            y_center = (y_min + y_max) / 2 / img_height
            width = (x_max - x_min) / img_width
            height = (y_max - y_min) / img_height
            label = f"{int(class_id)} {x_center} {y_center} {width} {height} {float(confidence)}\n"
            
            # Save the detection to a TXT file
            output_file = os.path.join(output_folder, f"{os.path.splitext(image_name)[0]}.txt")
            with open(output_file, 'w') as f:
                f.write(label)

print("All predictions have been processed and saved to TXT files.")