In [2]:
import os
import cv2
from ultralytics import YOLO

In [12]:
# Input
test_vid = 'IMG_3455_yt.mp4'                               # pick test video
test_model = 'train9_987_img--val_333_img_240epochs'    # pick model
VIDEOS_DIR = os.path.join('.', 'data', 'test_video')
video_path = os.path.join(VIDEOS_DIR, test_vid)

# Classification
threshold = 0.35
class_name_dict = {
    0: 'Neon tetra',
    1: 'Siamese Flying Fox',
    2: 'Ember Tetra',
    3: 'African Lampeye',
    4: 'Rummy Nose Tetra'
}

# Appearance
codec = 'MP4V'                                          # 'MP4V' for faster/low quality, 'H264' for slower/high quality
font_size = 0.3                                         # Assumes video to be 720p
font = cv2.FONT_HERSHEY_SIMPLEX
rect_thickness = 1
text_thickness = 1

# Output
video_path_out = f'{video_path[:-4]}_987_333_240epochs_{threshold}thresh.mp4'

In [13]:
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
H, W, _ = frame.shape
print(H, W)

if H >= 1080:                                           # if video is 1080p or better
    font_size = 0.7

out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*codec), int(cap.get(cv2.CAP_PROP_FPS)), (W, H))

model_path = os.path.join('.', 'runs', 'detect', test_model, 'weights', 'best.pt')

# Load the model
model = YOLO(model_path)                                # load a custom model

720 1280


OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [14]:
while ret:
    results = model(frame)[0]

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        if score > threshold:
            confidence = f'{score:.2f}'                 # Format the confidence score to two decimal places
            pred_class = class_name_dict[int(class_id)].upper()
            text = f'{pred_class}: {confidence}'        # Include confidence in the text
            
            if pred_class == "NEON TETRA":
                color = (255, 0, 0)                     # blue
            elif pred_class == "SIAMESE FLYING FOX":
                color = (255, 255, 255)                 # white
            elif pred_class == "EMBER TETRA":
                color = (0, 0, 255)                     # red
            elif pred_class == "AFRICAN LAMPEYE":
                color = (220, 220, 0)                   # cyan
            elif pred_class == "RUMMY NOSE TETRA":
                color = (0, 220, 220)                   # yellow

            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, rect_thickness)
            cv2.putText(frame, text, (int(x1), int(y1 - 10)),
                        font, font_size, color, text_thickness, cv2.LINE_AA)

    out.write(frame)
    ret, frame = cap.read()

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 12 Neon tetras, 1 Ember Tetra, 1 Rummy Nose Tetra, 163.2ms
Speed: 2.4ms preprocess, 163.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 Neon tetras, 1 Ember Tetra, 2 Rummy Nose Tetras, 150.5ms
Speed: 1.1ms preprocess, 150.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 Neon tetras, 1 Ember Tetra, 2 Rummy Nose Tetras, 147.4ms
Speed: 1.1ms preprocess, 147.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 Neon tetras, 1 Ember Tetra, 145.1ms
Speed: 1.1ms preprocess, 145.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 Neon tetras, 1 Ember Tetra, 148.6ms
Speed: 1.0ms preprocess, 148.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 Neon tetras, 1 Ember Tetra, 145.4ms
Speed: 1.0ms preprocess, 145.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 Neon tetras, 1 Ember Tetra, 14