In [2]:
import os
import cv2
import numpy as np
from ultralytics import YOLO

test_model = 'train9_987_img--val_333_img_240epochs'

# Classification
threshold = 0.30
class_name_dict = {
    0: 'Neon tetra',
    1: 'Siamese Flying Fox',
    2: 'Ember Tetra',
    3: 'African Lampeye',
    4: 'Rummy Nose Tetra'
}

# Appearance
codec = 'MP4V'                                          # 'MP4V' for faster/low quality, 'H264' for slower/high quality
font_size = 0.3                                         # Assumes video to be 720p
font = cv2.FONT_HERSHEY_SIMPLEX
rect_thickness = 1
text_thickness = 1
frame_rate = 10                                         # To get original frame_rate: int(cap.get(cv2.CAP_PROP_FPS))

# Output
video_path_out = f'camera_04_987_333_240epochs_{threshold}thresh.mp4'
cap = cv2.VideoCapture(0)

ret, frame = cap.read()
H, W, _ = frame.shape
print(H, W)

if H >= 1080:                                           # if video is 1080p or better
    font_size = 0.7

out = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*codec), frame_rate, (W,H))

model_path = os.path.join('.', 'runs', 'detect', test_model, 'weights', 'best.pt')

# Load the model
model = YOLO(model_path)                                # load a custom model


# Real-time camera feed
while ret:
    results = model(frame)[0]

    for result in results.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = result

        if score > threshold:
            confidence = f'{score:.2f}'                 # Format the confidence score to two decimal places
            pred_class = class_name_dict[int(class_id)].upper()
            text = f'{pred_class}: {confidence}'        # Include confidence in the text
            
            if pred_class == "NEON TETRA":
                color = (255, 0, 0)                     # blue
            elif pred_class == "SIAMESE FLYING FOX":
                color = (255, 255, 255)                 # white
            elif pred_class == "EMBER TETRA":
                color = (0, 0, 255)                     # red
            elif pred_class == "AFRICAN LAMPEYE":
                color = (220, 220, 0)                   # cyan
            elif pred_class == "RUMMY NOSE TETRA":
                color = (0, 220, 220)                   # yellow

            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, rect_thickness)
            cv2.putText(frame, text, (int(x1), int(y1 - 10)),
                        font, font_size, color, text_thickness, cv2.LINE_AA)

    out.write(frame)
            
    cv2.imshow('Tropical Fish Detector', frame)              # Display the frame with object detection labels

    ret, frame = cap.read()
    
    if cv2.waitKey(1) == 27:
        break

cap.release()
out.release()
cv2.destroyAllWindows()

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'



1080 1920


0: 384x640 (no detections), 150.7ms
Speed: 1.9ms preprocess, 150.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 Neon tetras, 1 Rummy Nose Tetra, 150.6ms
Speed: 1.7ms preprocess, 150.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Neon tetras, 145.4ms
Speed: 1.4ms preprocess, 145.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Neon tetra, 143.7ms
Speed: 1.4ms preprocess, 143.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Neon tetras, 144.1ms
Speed: 1.5ms preprocess, 144.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Neon tetra, 144.3ms
Speed: 1.5ms preprocess, 144.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Neon tetras, 144.3ms
Speed: 1.6ms preprocess, 144.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Neon tetras, 143.6ms
Speed: 1.6ms pr

: 