In [2]:
import cv2
import numpy as np
from ultralytics import YOLO


# defining function for creating a writer (for mp4 videos)
def create_video_writer(video_cap, output_filename):
    # grab the width, height, and fps of the frames in the video stream.
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))
    # initialize the FourCC and a video writer object
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))
    return writer

In [3]:
from collections import defaultdict
import pandas as pd

# Load the YOLOv8 model
model = YOLO('yolov8s-obb.pt')

classNames = [
    'plane',
    'ship',
    'storage tank',
    'baseball diamond',
    'tennis court',
    'basketball court',
    'ground track field',
    'harbor',
    'bridge',
    'large vehicle',
    'small vehicle',
    'helicopter',
    'roundabout',
    'soccer ball field',
    'swimming pool'
]
positions = []
# Open the video file
video_path = "Puente_guambra2.mp4"
cap = cv2.VideoCapture(video_path)

# Store the track history
track_history = defaultdict(lambda: [])
# Store the class history
cls_history = defaultdict(lambda: [])

frame_number = 0

writer = create_video_writer(cap, f"{video_path}_annoted.mp4")

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        frame_number += 1

        results = model.predict(frame, conf=0.01, classes=[9,10])
        

        # Visualize the results on the frame
        annotated_frame = results[0].plot(line_width=2, font_size=0.01)
        
        writer.write(annotated_frame)
        # Get the boxes and track IDs
        
        if results[0].obb.xywhr is not None:
        
            boxes = results[0].obb.xywhr.cpu().tolist()
            clss = results[0].obb.cls.cpu().tolist()
            confs = results[0].obb.conf.cpu().tolist()


            # Plot the tracks
            for i, (box, cls, conf) in enumerate(zip(boxes, clss,confs)):
                x, y, w, h, r = box
                
                positions.append([int(frame_number), int(i+1), str(classNames[int(cls)]), float(conf), float(x), float(y),float(w), float(h), float(r)])  # x, y center point

        # Display the annotated frame
        cv2.imshow("YOLOv8 Obb", cv2.resize(annotated_frame, (1920, 1080)))

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

df = pd.DataFrame(positions, columns=["frame_number", "id", "class","conf", "cx", "cy", "w", "h", "r"])

# Release the video capture object and close the display window
cap.release()
writer.release()
cv2.destroyAllWindows()

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'



0: 544x1024 841.2ms
Speed: 4.8ms preprocess, 841.2ms inference, 437.0ms postprocess per image at shape (1, 3, 544, 1024)



qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/arthur/.local/lib/python3.10/site-packages/cv2/qt/plugins"


0: 544x1024 756.1ms
Speed: 4.8ms preprocess, 756.1ms inference, 22.4ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 795.4ms
Speed: 3.8ms preprocess, 795.4ms inference, 38.2ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 768.2ms
Speed: 3.3ms preprocess, 768.2ms inference, 9.8ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 241.3ms
Speed: 3.0ms preprocess, 241.3ms inference, 9.7ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 256.5ms
Speed: 4.1ms preprocess, 256.5ms inference, 8.9ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 236.5ms
Speed: 3.7ms preprocess, 236.5ms inference, 9.5ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 183.8ms
Speed: 4.4ms preprocess, 183.8ms inference, 9.7ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 212.4ms
Speed: 3.6ms preprocess, 212.4ms inference, 9.6ms postprocess per image at shape (1, 3, 544, 1024)

0: 544x1024 196.1ms
Speed: 3.7ms prepr

ValueError: 8 columns passed, passed data had 9 columns

In [5]:
df = pd.DataFrame(positions, columns=["frame_number", "id", "conf", "class", "x", "y", "w", "h", "r"])
df.to_csv(f"{video_path.split(".")[0]}_positions.csv", index=False)
df

Unnamed: 0,frame_number,id,conf,class,x,y,w,h,r
0,1,1,small vehicle,0.802636,3148.414062,1927.137207,64.585747,30.545033,0.689725
1,1,2,small vehicle,0.795741,3054.048584,463.554932,71.559853,32.174553,2.819362
2,1,3,large vehicle,0.775441,3079.295166,1646.043091,190.205017,43.182365,0.649792
3,1,4,small vehicle,0.775234,3848.507080,297.390717,67.181137,28.472355,2.855587
4,1,5,small vehicle,0.774091,238.860870,1675.974121,68.550430,31.932125,0.058043
...,...,...,...,...,...,...,...,...,...
682993,2787,257,small vehicle,0.010771,1617.497070,981.535706,58.366802,25.187868,0.528911
682994,2787,258,small vehicle,0.010659,1522.585083,474.937256,87.213348,34.550034,2.684876
682995,2787,259,small vehicle,0.010522,3660.509521,1577.630249,36.204979,23.738024,2.863581
682996,2787,260,small vehicle,0.010248,3945.224609,213.176300,64.669098,28.852522,2.828396


291

: 