In [5]:
# --- IMPORTS ---
import cv2
from ultralytics import YOLO
import easyocr
import numpy as np

In [6]:
# --- INITIALIZATION ---

# Load the YOLO model
# Replace 'path/best.pt' with the actual path to the trained model file
model = YOLO('best.pt')

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'], gpu=True) # Use gpu=False if there is no CUDA-enabled GPU

# Open the video file
video_path = 'test.MOV'
cap = cv2.VideoCapture(video_path)

In [7]:
# --- VIDEO WRITER SETUP ---
# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object
output_path = 'output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4 file
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

In [8]:
# --- VIDEO PROCESSING LOOP ---

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection
    results = model(frame, stream=True)

    # Process results
    for result in results:
        boxes = result.boxes
        for box in boxes:
            # Check if the detected object is a plate number (class index 1)
            if int(box.cls[0]) == 1:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                plate_crop = frame[y1:y2, x1:x2]

                try:
                    # 4. Use EasyOCR to read text
                    plate_text_results = reader.readtext(plate_crop, detail=0, paragraph=False)
                    plate_text = " ".join(plate_text_results).upper().replace(" ", "") if plate_text_results else "READING..."
                except Exception as e:
                    plate_text = "ERROR"
                
                # 5. Draw results on the frame
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                (text_width, text_height), baseline = cv2.getTextSize(plate_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
                cv2.rectangle(frame, (x1, y1 - text_height - 10), (x1 + text_width, y1 - 5), (0, 255, 0), -1)
                cv2.putText(frame, plate_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

    # --- WRITE FRAME TO VIDEO FILE ---
    out.write(frame)

    # Display the processed frame (optional, can be commented out for faster processing)
    # cv2.imshow('YOLO + OCR Vehicle Recognition', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up
cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 1 Car, 2 LicensePlates, 84.4ms
Speed: 12.0ms preprocess, 84.4ms inference, 10.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 51.5ms
Speed: 1.9ms preprocess, 51.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 41.4ms
Speed: 1.6ms preprocess, 41.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 37.7ms
Speed: 1.8ms preprocess, 37.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 37.7ms
Speed: 1.7ms preprocess, 37.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 39.0ms
Speed: 1.7ms preprocess, 39.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 LicensePlate, 39.3ms
Speed: 1.8ms preprocess, 39.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Car, 1 Lic

In [9]:
print(f"✅ Processed video saved to: {output_path}")

✅ Processed video saved to: output_video.mp4
