In [1]:
import os
import cv2
import json
import easyocr
import numpy as np
from ultralytics import YOLO
import matplotlib.pyplot as plt
from difflib import SequenceMatcher
from torchmetrics.text import CharErrorRate, WordErrorRate

In [2]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [3]:
yolov8_model = YOLO("yolov8_runs/detect/train4/weights/best.pt")  
reader = easyocr.Reader(['en'])

In [4]:
test_dir = "dataset/test"
paths = os.listdir(test_dir)

files = sorted([fname for fname in paths if fname.endswith('.jpg')])

In [5]:
with open('trodo-v01/ground-truth/groundtruth.json') as gt:
    data = json.load(gt)['odometers']

In [6]:
k = 0
target = []

for i in range(len(data)):
    if k >= len(files):
        break

    if data[i]['image'] == files[k]:
        target.append(data[i]['mileage'])
        k += 1

In [7]:
preds = []

for i in range(len(files)):
    image_path = os.path.join(test_dir, files[i])
    image = cv2.imread(image_path)
    img = image.copy()

    v8_results = yolov8_model(image_path)

    prediction = v8_results[0]
    boxes = prediction.boxes
    xywh = boxes.xywh[0]

    image = image[int(xywh[1]): int(xywh[1] + xywh[3]), int(xywh[0]): int(xywh[0] + xywh[2])]

    cv2.rectangle(img, (int(xywh[0]), int(xywh[1])), (int(xywh[0] + xywh[2]), int(xywh[1] + xywh[3])), (255,0,0), 3)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    ocr_results = reader.readtext(image, allowlist='0123456789')

    value = []
    value.append('')

    for result in ocr_results:
        if result is None:
            continue
        x1 = min([bbox[0] for bbox in result[0]])
        y1 = min([bbox[1] for bbox in result[0]])
        x2 = max([bbox[0] for bbox in result[0]])
        y2 = max([bbox[1] for bbox in result[0]])
        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
        value.append(result[1])
    
    preds.append(value)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    plt.imshow(image)
    plt.axis('off')
    plt.savefig(f'ocr_results/{files[i]}', bbox_inches='tight', pad_inches=0)
    plt.close()


image 1/1 /home/umut/Documents/ai-project/dataset/test/00000020-PHOTO-2020-11-20-11-23-18.jpg: 640x384 1 digital, 67.0ms
Speed: 5.6ms preprocess, 67.0ms inference, 17.6ms postprocess per image at shape (1, 3, 640, 384)


  return F.conv2d(input, weight, bias, self.stride,



image 1/1 /home/umut/Documents/ai-project/dataset/test/00000031-PHOTO-2020-11-20-20-22-09.jpg: 640x480 1 analog, 77.8ms
Speed: 4.5ms preprocess, 77.8ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /home/umut/Documents/ai-project/dataset/test/00000046-PHOTO-2020-11-20-22-59-59.jpg: 640x320 1 digital, 71.1ms
Speed: 2.7ms preprocess, 71.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 320)

image 1/1 /home/umut/Documents/ai-project/dataset/test/00000053-PHOTO-2020-11-21-09-54-07.jpg: 640x480 1 digital, 12.9ms
Speed: 5.3ms preprocess, 12.9ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 /home/umut/Documents/ai-project/dataset/test/00000054-PHOTO-2020-11-21-09-56-40.jpg: 320x640 1 digital, 108.4ms
Speed: 3.2ms preprocess, 108.4ms inference, 1.5ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 /home/umut/Documents/ai-project/dataset/test/00000056-PHOTO-2020-11-21-10-15-31.jpg: 640x320 1 digital, 10.0ms
S

In [8]:
cer = CharErrorRate()
wer = WordErrorRate()

In [9]:
for i in range(len(preds)):
    percents = []
    for pred in preds[i]:
        percents.append(similar(target[i], pred))
    preds[i] = preds[i][np.argmax(percents)]

In [10]:
print(cer(preds, target))

tensor(0.4237)


In [11]:
print(wer(preds, target))

tensor(0.9247)
