# Mount to Google Drive

Use the code below if using Gloud Computing

In [1]:
from google.colab import drive
from datetime import datetime

drive.mount('/content/gdrive')

Mounted at /content/gdrive


# Downloading Dependencies and Libraries

In [2]:
!pip install ultralytics
!pip install torch torchvision torchaudio
!pip install easyocr
!pip install opencv-python
!pip install numpy

Collecting ultralytics
  Downloading ultralytics-8.3.146-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

# Importing Dependencies and Variables

In [None]:
import os
import cv2
import easyocr
import csv
from ultralytics import YOLO
import torch
from tqdm import tqdm
import sys
import time
import numpy as np

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# Path Configuration

In [None]:
INPUT_PATH = "/content/gdrive/My Drive/cos30018-test/data/images/val"
OUTPUT_PATH = "/content/gdrive/My Drive/cos30018-test/EasyOCR/PredictedImages"
RESULT_PATH = "/content/gdrive/My Drive/cos30018-test/EasyOCR/Result"
YOLO_MODEL_PATH = "/content/gdrive/My Drive/cos30018-test/yolov11/train_110525/runs/detect/train2/weights/best.pt"
SAVE_RESULTS = True
SAVE_CSV = True

Path Checking

In [None]:
paths_to_check = {
    "INPUT_PATH": INPUT_PATH,
    "OUTPUT_PATH": OUTPUT_PATH,
    "RESULT_PATH": RESULT_PATH,
    "YOLO_MODEL_PATH": YOLO_MODEL_PATH
}

for name, path in paths_to_check.items():
    if os.path.exists(path):
        print(f"✅ {name} exists: {path}")
    else:
        print(f"❌ {name} does NOT exist: {path}")

✅ INPUT_PATH exists: /content/gdrive/My Drive/cos30018-test/data_v2/images/train
✅ OUTPUT_PATH exists: /content/gdrive/My Drive/cos30018-test/EasyOCR/PredictedImages
✅ RESULT_PATH exists: /content/gdrive/My Drive/cos30018-test/EasyOCR/Result
✅ YOLO_MODEL_PATH exists: /content/gdrive/My Drive/cos30018-test/yolov11/train_110525/runs/detect/train2/weights/best.pt


# Initialize EasyOCR with GPU if available

In [None]:
reader = easyocr.Reader(['en'],
                       gpu=torch.cuda.is_available(),
                       quantize=True,
                       model_storage_directory=RESULT_PATH)



# Initializing YOLO Model

In [None]:
if os.path.exists(YOLO_MODEL_PATH):
    print(f"✅ YOLO model path exists: {YOLO_MODEL_PATH}")
    model = YOLO(YOLO_MODEL_PATH)
else:
    print(f"❌ YOLO model path does NOT exist: {YOLO_MODEL_PATH}")
    model = None

results_csv = []

✅ YOLO model path exists: /content/gdrive/My Drive/cos30018-test/yolov11/train_110525/runs/detect/train2/weights/best.pt


# Function to Visualize Detection

In [None]:
def visualize_detections(img, detections, plate_results, filename):
    """Visualize detections on the image with confidence scores"""
    font = cv2.FONT_HERSHEY_SIMPLEX
    detected_texts = []
    detection_info = []

    for idx, box in enumerate(detections):
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        detection_conf = float(box.conf[0].cpu().numpy())
        plate_text = "Unknown"
        ocr_conf = 0.0

        if plate_results and len(plate_results) > idx:
            plate_text = plate_results[idx]["text"]
            ocr_conf = plate_results[idx]["confidence"]

        # Draw bounding box
        color = (0, 255, 0)  # Green
        img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)

        # Create text with both detection and OCR confidence
        info_text = f"{plate_text} (Det: {detection_conf:.2f}, OCR: {ocr_conf:.2f})"

        # Calculate text position (above the bounding box)
        text_pos = (x1, max(20, y1 - 10))

        # Draw text background for better visibility
        (text_width, text_height), _ = cv2.getTextSize(info_text, font, 0.7, 2)
        cv2.rectangle(img, (x1, y1 - text_height - 10), (x1 + text_width, y1), color, -1)

        # Draw text
        img = cv2.putText(img, info_text, text_pos, font, 0.7, (0, 0, 0), 2, cv2.LINE_AA)

        detected_texts.append(plate_text)
        detection_info.append({
            "text": plate_text,
            "detection_conf": detection_conf,
            "ocr_conf": ocr_conf
        })

    # Save the image if needed
    if filename and SAVE_RESULTS:
        output_path = os.path.join(OUTPUT_PATH, filename)
        cv2.imwrite(output_path, img)

    return img, detection_info

# Function to Process Images / Frames

In [None]:
def process_frame(img, frame_id=None, filename=None):
    try:
        results = model.predict(img,
                                imgsz=640,
                                conf=0.5,
                                iou=0.45,
                                device='cuda:0' if torch.cuda.is_available() else 'cpu',
                                verbose=False)

        detections = results[0].boxes

        if len(detections) == 0:
            print(f"⚠️ No detections in {filename or frame_id}")
            return img, {
                "source": filename or f"frame_{frame_id}",
                "detected_text": "",
                "num_plates": 0,
                "timestamp": datetime.now().isoformat(),
                "detection_conf": "",
                "ocr_conf": "",
                "details": []
            }

        plate_results = []

        for box in detections:
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            detection_conf = float(box.conf[0].cpu().numpy())

            plate_crop = img[y1:y2, x1:x2]
            plate_crop_rgb = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2RGB)

            ocr_result = reader.readtext(plate_crop_rgb,
                                       batch_size=1,
                                       width_ths=0.7,
                                       text_threshold=0.5)

            # Combine all OCR results for this plate
            combined_text = " ".join([r[1] for r in ocr_result])
            avg_ocr_conf = np.mean([r[2] for r in ocr_result]) if ocr_result else 0.0

            plate_results.append({
                "text": combined_text,
                "confidence": avg_ocr_conf,
                "detection_conf": detection_conf
            })

        # Visualize detections
        visualized_img, detection_details = visualize_detections(img, detections, plate_results, filename)

         # Get ground truth (if available)
        # gt_text = ground_truth.get(filename, "")

        # Calculate accuracy
        # accuracy = calculate_accuracy(combined_text) if gt_text else None

        result_info = {
            "source": filename or f"frame_{frame_id}",
            "detected_text": " | ".join([r["text"] for r in plate_results]),
            "num_plates": len(detections),
            "timestamp": datetime.now().isoformat(),
            "detection_conf": ", ".join([f"{r['detection_conf']:.2f}" for r in plate_results]),
            "ocr_conf": ", ".join([f"{r['confidence']:.2f}" for r in plate_results]),
            "details": detection_details
        }

        results_csv.append(result_info)
        return visualized_img, result_info

    except Exception as e:
        print(f"⚠️ Error processing frame {filename or frame_id}: {str(e)}")
        return img, {
            "source": filename or f"frame_{frame_id}",
            "detected_text": "",
            "num_plates": 0,
            "timestamp": datetime.now().isoformat(),
            "detection_conf": "",
            "ocr_conf": "",
            "details": []
        }

# Function to Load Ground Truth CSV

In [None]:
def load_ground_truth(csv_path):
    """Load image-to-text mapping from CSV"""
    gt = {}
    with open(csv_path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            gt[row["image_name"]] = row["ground_truth_text"]
    return gt

ground_truth_df = pd.read_csv('/content/drive/My Drive/cos30018-test/EasyOCR/Result/labels_val.csv')
ground_truth_dict = dict(zip(ground_truth_df['filename'], ground_truth_df['text']))

FileNotFoundError: [Errno 2] No such file or directory: '/content/gdrive/My Drive/cos30018-test/data_v2/images/labels'

# Function to Add Accuracy Evaluation

In [None]:
import Levenshtein
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

def calculate_text_similarity(pred_text, true_text):
    """Compute text similarity using Levenshtein distance (0-1 scale)"""
    pred_text = pred_text.upper().strip()
    true_text = true_text.upper().strip()
    distance = Levenshtein.distance(pred_text, true_text)
    max_len = max(len(pred_text), len(true_text))
    return 1.0 - (distance / max_len) if max_len > 0 else 0.0

def calculate_character_accuracy(pred_text, true_text):
    """Calculate character-level accuracy"""
    pred_text = pred_text.upper().strip()
    true_text = true_text.upper().strip()
    correct_chars = sum(1 for p, t in zip(pred_text, true_text) if p == t)
    total_chars = max(len(pred_text), len(true_text))
    return correct_chars / total_chars if total_chars > 0 else 0

# Functions to Save Results

In [None]:
def save_results_to_csv():
    """Save results to CSV with detailed confidence information"""
    csv_path = os.path.join(RESULT_PATH, "ocr_results.csv")
    file_exists = os.path.exists(csv_path)

    # Enhanced field names to include more details
    fieldnames = [
        "source",
        "detected_text",
        "num_plates",
        "timestamp",
        "detection_conf",
        "ocr_conf",
        "details"
    ]

    with open(csv_path, mode='a' if file_exists else 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)

        if not file_exists:
            writer.writeheader()

        for result in results_csv:
            # Convert details list to string for CSV
            details_str = "; ".join([
                f"Text: {d['text']}, DetConf: {d['detection_conf']:.2f}, OCRConf: {d['ocr_conf']:.2f}"
                for d in result.get('details', [])
            ])

            row = {
                "source": result["source"],
                "detected_text": result["detected_text"],
                "num_plates": result["num_plates"],
                "timestamp": result["timestamp"],
                "detection_conf": result["detection_conf"],
                "ocr_conf": result["ocr_conf"],
                "details": details_str
            }
            writer.writerow(row)

    print(f"📄 Results saved to {csv_path}")

# Main Functions

Function for Images

In [None]:
def process_images():
    """Process all images in the input directory"""
    IMAGE_LIMIT = 30  # Set to None to process all images
    image_files = [f for f in os.listdir(INPUT_PATH) if f.lower().endswith((".jpg", ".jpeg", ".png"))]

    for filename in tqdm(image_files[:IMAGE_LIMIT] if IMAGE_LIMIT else image_files, desc="Processing images"):
        full_path = os.path.join(INPUT_PATH, filename)
        img = cv2.imread(full_path)

        if img is None:
            print(f"❌ Could not read image: {filename}")
            continue

        processed_img, result_info = process_frame(img, filename=filename)

        if isinstance(processed_img, np.ndarray):
            print(f"✅ Processed {filename}: {result_info['detected_text']}")
        else:
            print(f"❌ Failed to process {filename}")

Function for Videos

In [None]:
def process_video():
    """Process video file"""
    cap = cv2.VideoCapture(INPUT_PATH)
    if not cap.isOpened():
        print(f"❌ Could not open video: {INPUT_PATH}")
        return

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    output_video_path = os.path.join(OUTPUT_PATH, "output_video.mp4")

    if SAVE_RESULTS:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    for frame_id in tqdm(range(total_frames), desc="Processing video"):
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame, _ = process_frame(frame, frame_id=frame_id)

        if SAVE_RESULTS:
            out.write(processed_frame)

    cap.release()
    if SAVE_RESULTS:
        out.release()
        print(f"✅ Processed video saved to {output_video_path}")

In [None]:
import difflib

def calculate_accuracy(predicted, ground_truth):
    # Compute character-level accuracy using difflib
    matcher = difflib.SequenceMatcher(None, predicted, ground_truth)
    return matcher.ratio()

def process_frame(image_path, reader, ground_truth_dict):
    import cv2
    image = cv2.imread(image_path)
    filename = image_path.split("/")[-1]

    # EasyOCR prediction
    result = reader.readtext(image)

    combined_text = ''.join([r[1] for r in result])
    confidence_scores = [r[2] for r in result]
    avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0

    # Get ground truth text
    gt_text = ground_truth_dict.get(filename, "")
    accuracy = calculate_accuracy(combined_text, gt_text) if gt_text else None

    return {
        "filename": filename,
        "predicted": combined_text,
        "ground_truth": gt_text,
        "accuracy": accuracy,
        "avg_confidence": avg_confidence
    }


Main Function

In [None]:
try:
    # Create output directories if they don't exist
    os.makedirs(OUTPUT_PATH, exist_ok=True)
    os.makedirs(RESULT_PATH, exist_ok=True)

    if os.path.isdir(INPUT_PATH):  # Process directory of images
        process_images()
    else:  # Process video file
        process_video()

    if SAVE_CSV and results_csv:
        save_results_to_csv()

    print("✅ Processing complete!")

except KeyboardInterrupt:
    print("⚠️ Processing interrupted by user")
    if SAVE_CSV and results_csv:
        save_results_to_csv()
except Exception as e:
    print(f"❌ Fatal error during processing: {e}")
    if SAVE_CSV and results_csv:
        save_results_to_csv()
    sys.exit(1)

Processing images:   3%|▎         | 1/30 [00:01<00:30,  1.04s/it]

✅ Processed S_20250506200236_1500_0025_frame_0750.jpg: 07922|


Processing images:   7%|▋         | 2/30 [00:01<00:26,  1.07it/s]

✅ Processed S_20250506191235_1500_0025_frame_1250.jpg: 0nbez | 05 Cjig 1


Processing images:  10%|█         | 3/30 [00:02<00:26,  1.00it/s]

✅ Processed S_20250506193736_1500_0025_frame_0750.jpg: 0n7922 | 05 Gj19 T | QM 7155


Processing images:  13%|█▎        | 4/30 [00:03<00:24,  1.06it/s]

✅ Processed S_20250506193736_1500_0025_frame_1000.jpg: (077922 | 03 6919 [


Processing images:  17%|█▋        | 5/30 [00:04<00:23,  1.08it/s]

✅ Processed S_20250506191235_1500_0025_frame_1500.jpg: 0igez | 05 cgi0 T


Processing images:  20%|██        | 6/30 [00:05<00:21,  1.10it/s]

✅ Processed S_20250506205238_1500_0025_frame_0000.jpg: 07{0167 | 07bzz


Processing images:  23%|██▎       | 7/30 [00:06<00:21,  1.07it/s]

✅ Processed S_20250506202737_1500_0025_frame_1000.jpg: 047922| | 056319 7 | QAN 7155


Processing images:  27%|██▋       | 8/30 [00:07<00:18,  1.19it/s]

✅ Processed S_20250506200236_1500_0025_frame_1000.jpg: (017922|


Processing images:  30%|███       | 9/30 [00:08<00:20,  1.03it/s]

✅ Processed S_20250506200236_1500_0025_frame_1250.jpg: 041932|


Processing images:  33%|███▎      | 10/30 [00:10<00:24,  1.24s/it]

✅ Processed S_20250506193736_1500_0025_frame_1250.jpg: [0n1922 |


Processing images:  37%|███▋      | 11/30 [00:12<00:29,  1.55s/it]

✅ Processed S_20250506205238_1500_0025_frame_0250.jpg: 011922| | 05 0913 T


Processing images:  40%|████      | 12/30 [00:14<00:31,  1.74s/it]

✅ Processed S_20250506202737_1500_0025_frame_1250.jpg: 017922] | 096919 [


Processing images:  43%|████▎     | 13/30 [00:16<00:27,  1.63s/it]

✅ Processed S_20250506193736_1500_0025_frame_1500.jpg: 037922


Processing images:  47%|████▋     | 14/30 [00:17<00:24,  1.56s/it]

✅ Processed S_20250506211739_0325_0025_frame_0000.jpg: 047822


Processing images:  50%|█████     | 15/30 [00:19<00:23,  1.57s/it]

✅ Processed S_20250506205238_1500_0025_frame_0500.jpg: 05 6ji} /


Processing images:  53%|█████▎    | 16/30 [00:20<00:21,  1.56s/it]

✅ Processed S_20250506202737_1500_0025_frame_1500.jpg: 0179ez| | 03621) 7 | OAM 7155


Processing images:  57%|█████▋    | 17/30 [00:22<00:20,  1.54s/it]

✅ Processed S_20250506200236_1500_0025_frame_1500.jpg: 041922| | QNN 7155


Processing images:  60%|██████    | 18/30 [00:23<00:18,  1.57s/it]

✅ Processed S_20250506205238_1500_0025_frame_0750.jpg: 06ji) 7 | 0n7922


Processing images:  63%|██████▎   | 19/30 [00:25<00:19,  1.75s/it]

✅ Processed S_20250506211739_0325_0025_frame_0250.jpg: 047922| | 0s 621] [


Processing images:  67%|██████▋   | 20/30 [00:28<00:18,  1.84s/it]

✅ Processed S_20250506205238_1500_0025_frame_1000.jpg: 017922| | 03621} /


Processing images:  70%|███████   | 21/30 [00:29<00:15,  1.75s/it]

✅ Processed S_20250506205238_1500_0025_frame_1250.jpg: 047922| | 05 Gd19 [


Processing images:  73%|███████▎  | 22/30 [00:31<00:13,  1.71s/it]

✅ Processed S_20250506205238_1500_0025_frame_1500.jpg: 047822| | 05 6210 T


Processing images:  77%|███████▋  | 23/30 [00:32<00:11,  1.64s/it]

⚠️ No detections in bg_340.jpg
✅ Processed bg_340.jpg: 


Processing images:  80%|████████  | 24/30 [00:34<00:09,  1.60s/it]

✅ Processed 20250426235636_041763_frame_0600.jpg:  | 


Processing images:  83%|████████▎ | 25/30 [00:35<00:07,  1.54s/it]

⚠️ No detections in bg_358.jpg
✅ Processed bg_358.jpg: 


Processing images:  87%|████████▋ | 26/30 [00:39<00:08,  2.19s/it]

⚠️ No detections in bg_351.jpg
✅ Processed bg_351.jpg: 


Processing images:  90%|█████████ | 27/30 [00:40<00:05,  1.90s/it]

⚠️ No detections in bg_346.jpg
✅ Processed bg_346.jpg: 


Processing images:  93%|█████████▎| 28/30 [00:41<00:03,  1.77s/it]

⚠️ No detections in bg_352.jpg
✅ Processed bg_352.jpg: 


Processing images:  97%|█████████▋| 29/30 [00:43<00:01,  1.59s/it]

⚠️ No detections in bg_333.jpg
✅ Processed bg_333.jpg: 


Processing images: 100%|██████████| 30/30 [00:44<00:00,  1.47s/it]

⚠️ No detections in bg_303.jpg
✅ Processed bg_303.jpg: 





📄 Results saved to /content/gdrive/My Drive/cos30018-test/EasyOCR/Result/ocr_results.csv
✅ Processing complete!
