In [1]:
import cv2
from ultralytics import YOLO

model = YOLO("yolov8x-obb.pt")

In [5]:
def predict(chosen_model, img, classes=[], conf=0.5):
    if classes:
        results = chosen_model.predict(img, classes=classes, conf=conf, persist=True)
    else:
        results = chosen_model.predict(img, conf=conf, persist=True)

    return results

def predict_and_detect(chosen_model, img, classes=[], conf=0.5, rectangle_thickness=2, text_thickness=1):
    results = predict(chosen_model, img, classes, conf=conf)
    for result in results:
        for box in result.boxes:
            cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                          (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (255, 0, 0), rectangle_thickness)
            cv2.putText(img, f"{result.names[int(box.cls[0])]}",
                        (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                        cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), text_thickness)
    return img, results

In [6]:
# defining function for creating a writer (for mp4 videos)
def create_video_writer(video_cap, output_filename):
    # grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))
    # initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))
    return writer

In [7]:
output_filename = "Annoted_video.mp4"

cap = cv2.VideoCapture(output_filename)

writer = create_video_writer(cap, output_filename)

video_path = r"./DJI_0015.MOV"
cap = cv2.VideoCapture(video_path)
while True:
    success, img = cap.read()
    if not success:
        break
    result_img, _ = predict_and_detect(model, img, classes=[], conf=0.1)
    writer.write(result_img)
    cv2.imshow("Image", cv2.resize(result_img, (1920, 1080)) )

    cv2.waitKey(1)
writer.release()

SyntaxError: '[31m[1mpersist[0m' is not a valid YOLO argument. 

    Arguments received: ['yolo', '--f=/home/arthur/.local/share/jupyter/runtime/kernel-v2-1015791bzW9r5Lfbkh.json']. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS

        Where   TASK (optional) is one of {'detect', 'classify', 'obb', 'pose', 'segment'}
                MODE (required) is one of {'train', 'export', 'predict', 'track', 'val', 'benchmark'}
                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
                    See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'

    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
        yolo train data=coco8.yaml model=yolov8n.pt epochs=10 lr0=0.01

    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
        yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320

    3. Val a pretrained detection model at batch-size 1 and image size 640:
        yolo val model=yolov8n.pt data=coco8.yaml batch=1 imgsz=640

    4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
        yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128

    6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
        yolo explorer

    5. Run special commands:
        yolo help
        yolo checks
        yolo version
        yolo settings
        yolo copy-cfg
        yolo cfg

    Docs: https://docs.ultralytics.com
    Community: https://community.ultralytics.com
    GitHub: https://github.com/ultralytics/ultralytics
     (<string>)

In [5]:
import cv2
import numpy as np
from ultralytics import YOLO


from collections import defaultdict



# object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Load the YOLOv8 model
model = YOLO('yolov9c.pt')

# Open the video file
video_path = "DJI_0015.MOV"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])
# Store the class history
cls_history = defaultdict(lambda: [])

frame_number = 0

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        frame_number += 1
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        # detect only classes=[2,5,7], car, bus, truck
        results = model.track(frame, persist=True, classes=[], conf=0.1)
        

        # Visualize the results on the frame
        annotated_frame = results[0].plot()
        # Get the boxes and track IDs
        
        if results[0].boxes.id is not None:
        
            boxes = results[0].boxes.xywh.cpu()
            clss = results[0].boxes.cls
            track_ids = results[0].boxes.id.int().cpu().tolist()


            # Plot the tracks
            for box, track_id, cls in zip(boxes, track_ids, clss):
                x, y, w, h = box
                track = track_history[track_id]
                ch = cls_history[track_id]
                track.append((float(x), float(y)))  # x, y center point
                ch.append((classNames[int(cls)], frame_number))
                # class name
                # print("Class name -->", classNames[int(cls)])

                # Draw the tracking lines
                points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
                cv2.polylines(annotated_frame, [points], isClosed=False, color=(255, 255, 0), thickness=2)

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", cv2.resize(frame, (1920, 1080)))

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 352x640 (no detections), 405.8ms
Speed: 2.0ms preprocess, 405.8ms inference, 0.7ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 350.2ms
Speed: 2.0ms preprocess, 350.2ms inference, 1.1ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 397.8ms
Speed: 1.9ms preprocess, 397.8ms inference, 1.1ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 312.8ms
Speed: 2.1ms preprocess, 312.8ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 292.8ms
Speed: 2.5ms preprocess, 292.8ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 317.4ms
Speed: 2.0ms preprocess, 317.4ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 319.3ms
Speed: 2.1ms preprocess, 319.3ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 (no detections), 321.8ms
Speed: 2.3ms prepr

KeyboardInterrupt: 

In [26]:
from collections import defaultdict

import cv2
import numpy as np

from ultralytics import YOLO

writer = create_video_writer(cap, output_filename)

# Load the YOLOv8 model
model = YOLO('yolov9c.pt')

# Open the video file
video_path = "DJI_0015.MOV"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True, conf=0.2)

        # Get the boxes and track IDs
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # # Plot the tracks
        # for box, track_id in zip(boxes, track_ids):
        #     x, y, w, h = box
        #     track = track_history[track_id]
        #     track.append((float(x), float(y)))  # x, y center point
        #     if len(track) > 30:  # retain 90 tracks for 90 frames
        #         track.pop(0)

        #     # Draw the tracking lines
        #     points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
        #     cv2.polylines(annotated_frame, [points], isClosed=False, color=(230, 230, 230), thickness=10)
        writer.write(result_img)
        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", cv2.resize(annotated_frame, (1920, 1080)))

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


0: 352x640 2 trucks, 853.3ms
Speed: 15.2ms preprocess, 853.3ms inference, 3.8ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 2 trucks, 739.7ms
Speed: 4.1ms preprocess, 739.7ms inference, 3.3ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 truck, 754.8ms
Speed: 3.8ms preprocess, 754.8ms inference, 3.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 person, 2 trucks, 888.8ms
Speed: 56.0ms preprocess, 888.8ms inference, 3.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 2 trucks, 793.5ms
Speed: 6.6ms preprocess, 793.5ms inference, 3.2ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 person, 1 truck, 760.5ms
Speed: 4.3ms preprocess, 760.5ms inference, 3.1ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 person, 1 truck, 760.0ms
Speed: 3.5ms preprocess, 760.0ms inference, 3.2ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 1 person, 2 trucks, 782.4ms
Speed: 50.1ms preprocess, 782.4ms i

KeyboardInterrupt: 