Detect the text in the image:

CRAFT:
- Precision: CRAFT focuses on character-level detection, offering high precision, especially for detecting text with irregular spacing or in cluttered scenes.
- Handling Curved Text: It excels at detecting curved or distorted text, making it ideal for challenging layouts where text does not follow a straight line.
- Affinity Between Characters: CRAFT uses an affinity score to effectively group characters into words or text lines, even in cases where the text is closely packed or partially obscured.

https://towardsdatascience.com/pytorch-scene-text-detection-and-recognition-by-craft-and-a-four-stage-network-ec814d39db05


In [1]:
import torch
from torch.autograd import Variable
from torchvision import transforms

import cv2
import numpy as np

import sys
sys.path.append('CRAFT-pytorch')  


from craft import CRAFT
from craft_utils import getDetBoxes, adjustResultCoordinates
from imgproc import resize_aspect_ratio, normalizeMeanVariance
from file_utils import load_model

In [5]:
pretrained_model_path = 'craft_mlt_25k.pth'
image_folder_path = 'test_images'
output_folder_path = 'results'

use_cuda = torch.cuda.is_available()

# Loading CRAFT
net = CRAFT()
net.load_state_dict(copyStateDict(torch.load(pretrained_model_path)))
if use_cuda:
    net = net.cuda()
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = False
net.eval()

In [None]:
def process_image(image_path):
    image = cv2.imread(image_path)
    bboxes, polys = test_net(net, image, use_cuda)
    return image, polys

# Main detection function
def test_net(net, image, use_cuda):
    # Resize the image
    img_resized, target_ratio, size_heatmap = resize_aspect_ratio(image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
    ratio_h = ratio_w = 1 / target_ratio

    # Preprocessing
    x = normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  
    x = Variable(x.unsqueeze(0))  

    if use_cuda:
        x = x.cuda()

    with torch.no_grad():
        y, _ = net(x)

    # Make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    # Post-processing
    boxes, polys = getDetBoxes(score_text, score_link, use_cuda)
    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = adjustResultCoordinates(polys, ratio_w, ratio_h)

    return boxes, polys

In [None]:
import os

image_folder_path = 'test_images'
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)


for image_name in os.listdir(image_folder_path):
    image_path = os.path.join(image_folder_path, image_name)
    image, polys = process_image(image_path)
    
    for i, poly in enumerate(polys):
        poly = np.array(poly, np.int32).reshape((-1))
        poly = poly.reshape(-1, 2)
        cv2.polylines(image, [poly], True, color=(0, 255, 0), thickness=2)
    
    cv2.imwrite(os.path.join(output_folder_path, image_name), image)