In [9]:
import cv2
import numpy as np
from ultralytics import YOLO


# defining function for creating a writer (for mp4 videos)
def create_video_writer(video_cap, output_filename):
    # grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))
    # initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))
    return writer

In [10]:
from collections import defaultdict
import pandas as pd

# Load the YOLOv8 model
model = YOLO('weights/yolov8x-obb.pt' )

classNames = [
    'plane',
    'ship',
    'storage tank',
    'baseball diamond',
    'tennis court',
    'basketball court',
    'ground track field',
    'harbor',
    'bridge',
    'large vehicle',
    'small vehicle',
    'helicopter',
    'roundabout',
    'soccer ball field',
    'swimming pool'
]
positions = []
# Open the video file
video_path = "video/Puente_guambra2.mp4"
video_name = video_path.split(".")[0]


cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])
# Store the class history
cls_history = defaultdict(lambda: [])

frame_number = 0

writer = create_video_writer(cap, f"{video_name}_annoted.mp4")

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        frame_number += 1

        results = model.predict(frame, conf=0.001, classes=[9,10])
        

        # Visualize the results on the frame
        annotated_frame = results[0].plot(line_width=2, font_size=0.2)
        
        writer.write(annotated_frame)
        # Get the boxes and track IDs
        
        if results[0].obb.xywhr is not None:
        
            boxes = results[0].obb.xywhr.cpu().tolist()
            clss = results[0].obb.cls.cpu().tolist()
            confs = results[0].obb.conf.cpu().tolist()


            # Plot the tracks
            for i, (box, cls, conf) in enumerate(zip(boxes, clss,confs)):
                x, y, w, h, r = box
                
                positions.append([int(frame_number), int(i+1), str(classNames[int(cls)]), float(conf), float(x), float(y),float(w), float(h), float(r)])  # x, y center point

        # Display the annotated frame
        cv2.imshow("YOLOv8 Obb", cv2.resize(annotated_frame, (1920, 1080)))

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

df = pd.DataFrame(positions, columns=["frame", "id", "class", "conf", "x", "y", "w", "h", "r"])

# Release the video capture object and close the display window
cap.release()
writer.release()
cv2.destroyAllWindows()

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'



0: 544x1024 64.6ms
Speed: 2.8ms preprocess, 64.6ms inference, 4.1ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 60.4ms
Speed: 3.0ms preprocess, 60.4ms inference, 3.7ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 62.3ms
Speed: 3.1ms preprocess, 62.3ms inference, 4.1ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 60.8ms
Speed: 3.3ms preprocess, 60.8ms inference, 4.4ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 62.5ms
Speed: 3.3ms preprocess, 62.5ms inference, 3.6ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 62.4ms
Speed: 3.1ms preprocess, 62.4ms inference, 3.7ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 61.2ms
Speed: 3.5ms preprocess, 61.2ms inference, 4.4ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 61.8ms
Speed: 2.9ms preprocess, 61.8ms inference, 4.5ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 60.9ms
Speed: 3.4ms preprocess, 60.9ms infe

In [13]:
df.to_csv(f"annot/{video_name.split('/')[-1]}_positions.csv", index=False)