# Text Detection Benchmarking

**OBS**, requires the transformed IAM data created when running the IAM_yolo_preprocess.ipynb notebook. 

### Benchmarked models
- EAST
- YOLOv5 nano
- YOLOv5 small
- YOLOv5 large

## Imports

In [1]:
import os
import sys
import cv2 as cv
from matplotlib import pyplot as plt
from tqdm import tqdm
from shapely.affinity import rotate, translate
from shapely.geometry import Polygon

# So we can import the IAM_yolo class.
module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

from data import IAM_yolo
from yolo import YOLO
from east import EAST

## Data

If the notebook IAM_yolo_preprocess.ipynb has been run. The test data are available in ..data/IAM_yolo/. If not, it first has to be created.

In [None]:
iam_yolo = IAM_yolo()

data = iam_yolo.get_data('test')

data.head()

In [None]:
print("Number of test forms: ", len(data))
print("Number of words: ", sum([len(x) for x in list(data['bounding_boxes'])]))

## Util

In [None]:
def to_polygon(box):
    x1 = box[0]
    x2 = box[1]
    y1 = box[2]
    y2 = box[3]
    return  Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])


In [None]:
def draw_boxes(img, boxes):
    img_boxes = img.copy()
    for box in boxes:
        img_boxes = cv.rectangle(img_boxes, (box[0], box[3]), (box[1], box[2]), (255, 0, 0), 2)
    
    plt.imshow(img_boxes)
    plt.show()

In [None]:
sample = data.sample()
img_path = sample['img_path'].values[0]
bounding_boxes = sample['bounding_boxes'].values[0]

img = cv.imread(img_path)

draw_boxes(img, bounding_boxes)


## Evaluate models

In [None]:
def evaluate(model, data):
    t_pos = 0
    f_pos = 0
    n_cases = 0
    sum_inf_time = 0
    iou_threshold  = 0.5
    tickmeter = cv.TickMeter()
    
    for index, row in tqdm(data.iterrows(), total=len(data)):
        # Fetch image.
        img_path = row["img_path"]
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        
        # Pass image through detection model.
        tickmeter.start()
        bounding_boxes = model(img)
        tickmeter.stop()
        
        # Convert pandas dataframe to list.
        bounding_boxes = bounding_boxes.values.tolist()
        
        n_cases += len(row["bounding_boxes"])
        best_set = set()

        for idx, target in enumerate(row["bounding_boxes"]):
            best = -1
            best_idx = -1
            bb1 = to_polygon(target)

            for box in bounding_boxes:
                
                bb2 = to_polygon(box)

                intersection_area = bb1.intersection(bb2).area
                
                if intersection_area:
                    iou = intersection_area / bb1.union(bb2).area
                else:
                    iou = 0
                    
                if iou > best:
                    best = iou
                    best_idx = idx
                    
            if best > iou_threshold:
                t_pos += 1
                best_set.add(best_idx)
                
        f_pos += len(bounding_boxes) - len(best_set)
       
    f_neg = n_cases - t_pos 
    inf_time = tickmeter.getTimeMilli()
    tickmeter.reset()
    
    precision    = round(t_pos / (t_pos + f_pos), 3)
    recall       = round(t_pos / (t_pos + f_neg), 3)
    avg_inf_time = round(inf_time / n_cases, 3)

    print("Precision: ", precision)
    print("Recall: ", recall)
    print("Avg inf time: ", avg_inf_time)
    
    return precision, recall, avg_inf_time

### Models

#### EAST

In [None]:
model = EAST(0.5, 0.4)
prec_east, rec_east, inf_east = evaluate(model, data)

#### YOLOv5s

In [None]:
model = YOLO("../models/TD/yolov5s_orthographer.pt")
prec_yolov5s, rec_yolov5s, inf_yolov5s = evaluate(model, data)