In [1]:
from ultralytics import YOLO
import cv2
from paddleocr import PaddleOCR,draw_ocr
import numpy as np
import json
import os
import pathlib 


In [5]:
def find_index_words(lista, words):
    for index in range(0,len(lista)):
     if words[0] == lista[index].upper() or words[1] == lista[index].upper():
         return index

In [7]:
def make_json(txts):
    json_object = {
        "nome":"",
        "matricula":"",     
        }
    
    if "NOME" in txts or "NOME:" in txts:
        index = find_index_words(txts,["NOME","NOME:"])
        if index == 0 or index == 1:
            for word in txts[index+1:index+3]:
                if len(word) >= 11 and word.isupper():
                    json_object["nome"] = word
                    break
        else:
            for word in txts[index-2:index+3]:
                if len(word) >= 11 and word.isupper():
                    json_object["nome"] = word
                    break

        index = find_index_words(txts,["MATRICULA","MATRICULA:"])
        for word in txts[index-2:index+3]:
            if len(word) == 5 and word.isdigit():
                    json_object["matricula"] = word
                    break
            
    return json_object


In [8]:
def recognition(image, show_images,number_cracha):
    img_rec = image.copy()
    ocr = PaddleOCR(use_angle_cls=True, lang='pt', use_gpu = True, drop_score = 0.8 ) 
    result = ocr.ocr(img_rec, cls=True)
    result = result[0]

    txts = [line[1][0] for line in result]
    json_object = make_json(txts)
    
    if show_images:  
        boxes = [line[0] for line in result]
        scores = [line[1][1] for line in result]
        im_show = draw_ocr(img_rec, boxes, txts, scores, font_path="arial.ttf")
        cv2.imshow(f'Recognition {number_cracha}',cv2.resize(im_show,(640,640)))
   
    return json_object

In [10]:
def scanner(pure_img, json_archive_name, model, confi, write_results = False, show_images = False):

    if type(pure_img) == str:
        imagem = cv2.imread(pure_img)
    if type(pure_img) == np.ndarray:
        imagem = pure_img

    model = YOLO(model)  
    results = model(imagem, device = 0, imgsz = 640, conf = confi)
    img = results[0].plot()
 
    if len(results[0].boxes.data) == 0:
        if show_images:
            cv2.imshow('Entrada', cv2.resize(imagem,(320,320)))
            cv2.waitKey(0)
            cv2.destroyAllWindows()
        return None   
        
    boxes = results[0].boxes.cpu().numpy()
    detec_num=0
    for box in boxes.xyxy:
        x1 = int(box[0])
        y1 = int(box[1])
        x2 = int(box[2])
        y2 = int(box[3])
        amostras = imagem[y1:y2, x1:x2]
        info = recognition(amostras, show_images, detec_num)
        detec_num += 1

    abs_path = os.path.join(
            os.path.abspath(''),
            json_archive_name
        )

    if ".json" != pathlib.Path(json_archive_name).suffix:
        json_archive_name += ".json"

    if write_results:
        print(json.dumps(info, ensure_ascii=False, indent=2))

    with open(abs_path, 'w') as arquivo:
        json.dump(info, arquivo, ensure_ascii=False, indent=2)
        
    if show_images:
        cv2.imshow('Entrada', cv2.resize(imagem,(320,320)))
        cv2.imshow('Deteccao',cv2.resize(img,(320,320)))
        cv2.waitKey(0)
        cv2.destroyAllWindows()    

In [11]:
scanner("verso.jpeg",
        "resultado", 
        r"Cracha_nano.pt", 
        confi = 0.6,
        write_results = True, 
        show_images = True)


0: 640x384 1 cracha, 54.9ms
Speed: 10.0ms preprocess, 54.9ms inference, 7.3ms postprocess per image at shape (1, 3, 640, 384)


[2023/09/14 12:08:46] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\LSE/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\LSE/.paddleocr/whl\\rec\\latin\\latin_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, 

Testar em uma imagem

In [None]:
model = YOLO(r"Cracha_nano.pt")  
imagem = cv2.imread("verso.jpeg")
results = model(imagem, device = 0, imgsz = 640, conf = 0.4)
img = results[0].plot()
print(len(results[0].boxes.data))
cv2.imshow('',cv2.resize(img,(640,640)))
cv2.waitKey(0)
cv2.destroyAllWindows()

Testando os modelos em vídeo

In [None]:
model1 = YOLO(r"Cracha_nano.pt")
model2 = YOLO(r"Cracha_small.pt")

video_path = 0
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
 
    success, frame = cap.read()

    if success:
        
        results1 = model1(frame,device = 0, imgsz=384,conf = 0.86)
        boxes1 = results1[0].boxes.cpu().numpy()
        results2 = model2(frame,device = 0, imgsz=384,conf = 0.86)
        boxes2 = results2[0].boxes.cpu().numpy()
        n=0
  
        annotated_frame1 = results1[0].plot()
        annotated_frame2 = results2[0].plot()
        
        cv2.imshow("YOLOv8 Inference Nano", annotated_frame1)
        cv2.imshow("YOLOv8 Inference Small", annotated_frame2)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()