<a href="https://colab.research.google.com/github/yeonshiri/AGS/blob/main/code/short_answer_ocr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --------------------------------------------------------------
# PaddleOcr

!pip install paddlepaddle
!pip install paddleocr

Collecting paddlepaddle
  Downloading paddlepaddle-3.0.0-cp311-cp311-manylinux1_x86_64.whl.metadata (8.9 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting opt_einsum==3.3.0 (from paddlepaddle)
  Downloading opt_einsum-3.3.0-py3-none-any.whl.metadata (6.5 kB)
Downloading paddlepaddle-3.0.0-cp311-cp311-manylinux1_x86_64.whl (192.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.8/192.8 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Installing collected packages: opt_einsum, astor, paddlepaddle
  Attempting uninstall: opt_einsum
    Found existing installation: opt_einsum 3.4.0
    Uninstalling opt_einsum-3.4.0:
      Successfully uninstalled opt_einsum-3.4.0
Successful

In [None]:
import os
import cv2
from paddleocr import PaddleOCR
import matplotlib.pyplot as plt

# 1. 경로 설정
image_path = "/content/yolov5/images/valid/image_shortanswer_73_roi1_class1.jpg"
label_path = "/content/yolov5/labels/valid/image_shortanswer_73_roi1_class1.txt"

# 2. 이미지 읽기
image = cv2.imread(image_path)
if image is None:
    raise FileNotFoundError(f"이미지 로드 실패: {image_path}")
h, w = image.shape[:2]

# 3. OCR 초기화
ocr = PaddleOCR(use_angle_cls=False, lang='en')

# 4. 라벨 파일 읽고 ROI 추출 + OCR 수행
with open(label_path, 'r') as f:
    lines = f.readlines()

for idx, line in enumerate(lines):
    parts = line.strip().split()
    if len(parts) < 5:
        print(f"라벨 오류: {line.strip()}")
        continue

    # YOLO 좌표 → 픽셀 좌표
    _, x_center, y_center, box_w, box_h = map(float, parts)
    x1 = int((x_center - box_w / 2) * w)
    y1 = int((y_center - box_h / 2) * h)
    x2 = int((x_center + box_w / 2) * w)
    y2 = int((y_center + box_h / 2) * h)

    roi = image[y1:y2, x1:x2]
    if roi.size == 0:
        print(f"빈 ROI 스킵: {x1,y1,x2,y2}")
        continue

    # OCR 수행
    result = ocr.ocr(roi, cls=False)

    # 결과 출력
    print(f"\n ROI #{idx} 위치: {x1, y1, x2, y2}")
    for line in result:
        for box in line:
            text = box[1][0]
            conf = box[1][1]
            print(f"   인식: '{text}' (정확도: {conf:.2f})")

    # 시각화 (선택)
    plt.imshow(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
    plt.title(f"ROI #{idx}: '{text}'")
    plt.axis('off')
    plt.show()


[2025/05/18 02:39:43] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_l