Usage:
- Change the runtime from CPU to one of the provided GPUs
- Run the cells one by one

In [None]:
# Install necessary libraries
!pip install opencv-python-headless pyqt5 pyqtgraph ultralytics
from google.colab import files

By running this script, you will be able to upload a video as well as choosing a yolo model from the provided list.

2 files will be downloaded automatically:
- A .json file containing the information of the bounding boxes provided by the yolo model
- The annotated video with all the detections that were made


In [None]:
import sys
import cv2
import numpy as np
import json
import time
import os
from ultralytics import YOLO


def process_video_colab(video_path, model_name):
    
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    output_video = f"{base_name}_annotated_{model_name}.mp4"
    output_json = f"{base_name}_detection_data_{model_name}.json"

    # Load the YOLO model
    model = YOLO(model_name)

    # Open the video file for reading
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    # Initialize list to store detection data
    data = []
    frame_num = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    while cap.isOpened():

        # Read a frame from the video
        ret, frame = cap.read()
        if not ret:
            break

        # Perform object detection and tracking on the frame
        results = model.track(frame, persist=True)
        frame_ = results[0].plot()

        # Write the annotated frame to the output video
        out.write(frame_)

        # Extract detection data and append to the data list
        for box in results[0].boxes:
            bbox = box.xywh.cpu().numpy()[0]
            if hasattr(box, 'id') and box.id is not None:
                id_value = int(box.id.cpu().numpy()[0])
            else:
                id_value = None
            data.append({
                'frame': int(cap.get(cv2.CAP_PROP_POS_FRAMES)),
                'class': int(box.cls.cpu().numpy()[0]),
                'confidence': float(box.conf.cpu().numpy()[0]),
                'id': id_value,
                'x': float(bbox[0]),
                'y': float(bbox[1]),
                'width': float(bbox[2]),
                'height': float(bbox[3])
            })

    # Release resources
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # Save detection data to JSON file
    with open(output_json, 'w') as f:
        json.dump(data, f)

    print(f"Output video saved to: {output_video}")
    print(f"Detection data saved to: {output_json}")

    # Provide download links for the processed files
    files.download(output_video)
    files.download(output_json)

# Upload video file
uploaded = files.upload()

video_path = None
for fn in uploaded.keys():
    if fn.endswith(('.mp4', '.avi', '.mov')):
        video_path = fn

assert video_path is not None, "Please upload a video file."
print(f"Video uploaded: {video_path}")

# Dropdown menu for model selection
import ipywidgets as widgets
from IPython.display import display

model_dropdown = widgets.Dropdown(
    options=['yolov8n.pt', 'yolov8s.pt', 'yolov8m.pt', 'yolov8l.pt', 'yolov8x.pt'],
    value='yolov8n.pt',
    description='Select Model:',
)

display(model_dropdown)

# Button to process the video
process_button = widgets.Button(description="Process Video")

def on_process_button_clicked(b):
    process_video_colab(video_path, model_dropdown.value)

process_button.on_click(on_process_button_clicked)

display(process_button)