# YOLOv8 + PaddleOCR (Inference)

## Import Dependencies

In [7]:
from ultralytics import YOLO
import torch
import paddle
from paddleocr import PaddleOCR
import cv2
import warnings
warnings.filterwarnings("ignore")

## Configuration

In [8]:
# --- Configuration for Detection ---
TARGET_CLASS_NAME = "license_plate" # The string name of the class we want to detect with YOLO
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
YOLO_CONFIDENCE_THRESHOLD = 0.30 # Adjust as needed for YOLOv8

BOX_COLOR_YOLO = (255, 0, 0)       # Blue for YOLO car plates
TEXT_COLOR_YOLO = (255, 255, 255)  # White for YOLO label
TEXT_BG_COLOR_YOLO = (0, 0, 0)     # Black for YOLO label background

OCR_TEXT_COLOR_PADDLE = (0, 255, 0)  # Green for PaddleOCR text
OCR_TEXT_BG_COLOR_PADDLE = (0, 0, 0) # Black background for PaddleOCR text

# --- Get Video Path  ---
video_path = "../data/video/35.mp4"  # Or your desired video (e.g., data/video/58.mp4)
if video_path == '0':
    video_source = 0 # Use webcam
else:
    video_source = video_path

## Load YOLO

In [9]:
# Load YOLO cell
yolo_model_path = "../models/best_yolov8.pt"  # Ensure this is your trained YOLOv8 model
yolo_model = YOLO(yolo_model_path, task="detect")
print(f"YOLOv8 model loaded from {yolo_model_path}")

YOLOv8 model loaded from models/best_yolov8.pt


## Load PaddleOCR

In [10]:
gpu_available  = paddle.device.is_compiled_with_cuda()
print(gpu_available)

True


In [11]:
paddleocr = PaddleOCR(
    use_angle_cls = True,
    lang = 'en',
    use_gpu = True
)

[2025/05/11 20:37:24] ppocr DEBUG: Namespace(alpha=1.0, alphacolor=(255, 255, 255), benchmark=False, beta=1.0, binarize=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='C:\\Users\\User/.paddleocr/whl\\cls\\ch_ppocr_mobile_v2.0_cls_infer', cls_thresh=0.9, cpu_threads=10, crop_res_save_dir='./output', det=True, det_algorithm='DB', det_box_type='quad', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='C:\\Users\\User/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, det_pse_thresh=0, det_sast_nms_thresh=0.2, det_sast_score_thresh=0.5, draw_img_save_dir='./inference_results', drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e

## Draw Predictions (Object Detection & OCR)

In [12]:
# Main Processing and Drawing cell

# To store YOLO class names and our target class ID for YOLO
yolo_class_names_map = None
yolo_target_cls_id_int = None # Integer ID for the 'carplate' class in YOLO model

cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print(f"Error: Could not open video source '{video_source}'.")
elif paddleocr is None:
    print("Error: PaddleOCR was not initialized. Cannot proceed.")
else:
    print(f"Processing video: {video_source} with YOLOv8 and PaddleOCR")
    window_title = f"Car Plate YOLOv8 Detection & PaddleOCR"
    cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)

    frame_count = 0
    while cap.isOpened():
        ret, frame_bgr = cap.read()

        if not ret:
            if isinstance(video_source, str): print("End of video file reached.")
            else: print("Error reading frame from webcam.")
            break

        frame_to_draw = frame_bgr.copy()
        frame_count += 1

        # --- YOLOv8 Inference ---
        # YOLO model takes BGR NumPy array directly
        yolo_results = yolo_model(frame_bgr, device=device, verbose=False, conf=YOLO_CONFIDENCE_THRESHOLD)
        result = yolo_results[0] # Results for the first (and only) image

        # --- Initialize YOLO class names and target ID on the first frame (or if not set) ---
        if yolo_class_names_map is None and result.names:
            yolo_class_names_map = result.names
            print(f"YOLO Model Classes: {yolo_class_names_map}")
            for cls_id_int_key, name_str_val in yolo_class_names_map.items():
                if name_str_val.lower() == TARGET_CLASS_NAME.lower():
                    yolo_target_cls_id_int = cls_id_int_key
                    break
            if yolo_target_cls_id_int is None:
                print(f"Error: YOLO Target class '{TARGET_CLASS_NAME}' not found in YOLO model's classes: {yolo_class_names_map}")
            else:
                print(f"Targeting YOLO class '{TARGET_CLASS_NAME}' with integer ID: {yolo_target_cls_id_int}")

        # --- Process Detections and Perform OCR ---
        num_detections_in_frame = 0
        if yolo_target_cls_id_int is not None: # Proceed only if target class ID is found
            for box in result.boxes:
                cls_id_tensor = box.cls
                conf_tensor = box.conf
                xyxy_tensor = box.xyxy[0]

                cls_id_int_current = int(cls_id_tensor.item())
                confidence_current = conf_tensor.item()

                if cls_id_int_current == yolo_target_cls_id_int: # Check if detected class is our target
                    num_detections_in_frame += 1
                    xmin, ymin, xmax, ymax = map(int, xyxy_tensor.cpu().numpy())

                    xmin = max(0, xmin); ymin = max(0, ymin)
                    xmax = min(frame_bgr.shape[1], xmax); ymax = min(frame_bgr.shape[0], ymax)

                    # Draw YOLO bounding box
                    cv2.rectangle(frame_to_draw, (xmin, ymin), (xmax, ymax), BOX_COLOR_YOLO, 2)
                    yolo_label_text = f"{yolo_class_names_map[cls_id_int_current]}: {confidence_current:.2f}"

                    (text_w_yolo, text_h_yolo), base_yolo = cv2.getTextSize(yolo_label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                    cv2.rectangle(frame_to_draw, (xmin, ymin - text_h_yolo - base_yolo - 2), (xmin + text_w_yolo, ymin - base_yolo + 2), TEXT_BG_COLOR_YOLO, -1)
                    cv2.putText(frame_to_draw, yolo_label_text, (xmin, ymin - base_yolo -1),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, TEXT_COLOR_YOLO, 1, cv2.LINE_AA)

                    # --- PaddleOCR: Crop the plate and read text ---
                    if xmax > xmin and ymax > ymin:
                        plate_roi_bgr = frame_bgr[ymin:ymax, xmin:xmax]
                        try:
                            # paddleocr_instance is your initialized PaddleOCR object
                            ocr_output_list = paddleocr.ocr(plate_roi_bgr, det=False, rec=True, cls=paddleocr.use_angle_cls)

                            recognized_text = ""
                            text_confidence = 0.0

                            if ocr_output_list and ocr_output_list[0]: # PaddleOCR returns a list for each image; for single ROI, this list has 1 item.
                                detected_lines = ocr_output_list[0] # This item is a list of lines, e.g., [[(text_str, conf_score)], ...]
                                if detected_lines: # If any lines were recognized in the ROI
                                    # For a plate, we usually expect one line. Concatenate if multiple, or take best.
                                    # For simplicity, take the first recognized line's text.
                                    # Each line is a tuple: (text_string, confidence_score)
                                    line_info = detected_lines[0]
                                    recognized_text = line_info[0]
                                    text_confidence = line_info[1]

                                    # Optional: Clean the recognized text
                                    recognized_text = ''.join(filter(str.isalnum, recognized_text)).upper()

                            if recognized_text:
                                ocr_display_text = f"{recognized_text} ({text_confidence:.2f})"
                                ocr_text_y_pos = ymax + 20
                                if ocr_text_y_pos + 10 > frame_to_draw.shape[0]:
                                    ocr_text_y_pos = ymin - 10

                                (text_w_ocr, text_h_ocr), base_ocr = cv2.getTextSize(ocr_display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
                                cv2.rectangle(frame_to_draw, (xmin, ocr_text_y_pos - text_h_ocr - base_ocr),
                                              (xmin + text_w_ocr, ocr_text_y_pos + base_ocr),
                                              OCR_TEXT_BG_COLOR_PADDLE, -1)
                                cv2.putText(frame_to_draw, ocr_display_text, (xmin, ocr_text_y_pos),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, OCR_TEXT_COLOR_PADDLE, 2)
                                if frame_count % 10 == 0:
                                    print(f"Frame {frame_count}: YOLO Plate (Conf: {confidence_current:.2f}), PaddleOCR: '{recognized_text}' (Conf: {text_confidence:.2f})")
                        except Exception as e:
                            if frame_count % 10 == 0:
                                print(f"Frame {frame_count}: Error during PaddleOCR for a plate ROI: {e}")

        if frame_count % 30 == 0:
            print(f"Frame {frame_count}: Found {num_detections_in_frame} '{TARGET_CLASS_NAME}' instances by YOLOv8.")

        cv2.imshow(window_title, frame_to_draw)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            print("Exiting...")
            break

    if cap.isOpened():
        cap.release()
    cv2.destroyAllWindows()
    print("Video processing finished and resources released.")

Processing video: data/video/35.mp4 with YOLOv8 and PaddleOCR
YOLO Model Classes: {0: 'license_plate'}
Targeting YOLO class 'license_plate' with integer ID: 0
Frame 10: YOLO Plate (Conf: 0.89), PaddleOCR: 'AK3340' (Conf: 0.88)
Frame 10: YOLO Plate (Conf: 0.86), PaddleOCR: '389' (Conf: 0.33)
Frame 20: YOLO Plate (Conf: 0.89), PaddleOCR: 'RK3340' (Conf: 0.97)
Frame 20: YOLO Plate (Conf: 0.87), PaddleOCR: '3E' (Conf: 0.27)
Frame 20: YOLO Plate (Conf: 0.76), PaddleOCR: 'OAA71250' (Conf: 0.75)
Frame 30: YOLO Plate (Conf: 0.89), PaddleOCR: 'ORK3340' (Conf: 0.89)
Frame 30: YOLO Plate (Conf: 0.86), PaddleOCR: '38' (Conf: 0.43)
Frame 30: Found 2 'license_plate' instances by YOLOv8.
Frame 40: YOLO Plate (Conf: 0.88), PaddleOCR: 'RK3340' (Conf: 0.97)
Frame 40: YOLO Plate (Conf: 0.87), PaddleOCR: '35' (Conf: 0.41)
Frame 50: YOLO Plate (Conf: 0.87), PaddleOCR: '35' (Conf: 0.27)
Frame 50: YOLO Plate (Conf: 0.87), PaddleOCR: 'R3340' (Conf: 0.92)
Frame 60: YOLO Plate (Conf: 0.87), PaddleOCR: 'AK3340' 