In [1]:
import os
import sys

ENDSWITH = 'OCR'

NOTEBOOK_DIR = os.getcwd()

if not NOTEBOOK_DIR.endswith(ENDSWITH):
    raise ValueError(f"Not in correct dir, expect end with {ENDSWITH}, but got {NOTEBOOK_DIR} instead")

BASE_DIR = os.path.abspath(os.path.join(NOTEBOOK_DIR, '..', '..', '..', '..'))
print(BASE_DIR)

sys.path.insert(0, os.path.join(BASE_DIR, 'code'))

/mnt/e/B3/group_prj/group-project-b3


In [2]:
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
from MangaOCREvaluator import ParseAnnotation, MangaOCREvaluator
from pipeline.OCRModels.MangaOCRModel import MangaOCRModel

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Parse XML to JSON (one-time setup)
manga_name = "AisazuNihaIrarenai"
xml_path = os.path.join(BASE_DIR, 'data', 'Manga109_released_2023_12_07', 'annotations', f'{manga_name}.xml')
images_dir = os.path.join(BASE_DIR, 'data', 'Manga109_released_2023_12_07', 'images', manga_name)
output_dir = os.path.join(BASE_DIR, 'data', 'MangaOCR', 'jsons_processed')

parser = ParseAnnotation(xml_path, images_dir, output_dir)
json_output_path = parser.parse_and_save()

Parsing AisazuNihaIrarenai...


  Total pages: 94
  Total text annotations: 1473
  Saved to /mnt/e/B3/group_prj/group-project-b3/data/MangaOCR/jsons_processed/AisazuNihaIrarenai.json


In [5]:
evaluator = MangaOCREvaluator(device=device)

ocr_model = MangaOCRModel()

metrics = evaluator.compare_bbox_types(
    ocr_model=ocr_model,
    json_path=str(json_output_path),
    images_dir=images_dir,
    batch_size=1,
    verbose=False,
    max_images=5
)

[32m2025-12-01 09:13:19.589[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m16[0m - [1mLoading OCR model from kha-white/manga-ocr-base[0m



EVALUATING WITH TEXT BBOX


[32m2025-12-01 09:13:22.616[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m22[0m - [1mUsing CUDA[0m
[32m2025-12-01 09:13:23.565[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m35[0m - [1mOCR ready[0m


MangaOCR model loaded
Starting OCR evaluation with text bboxes...


100%|██████████| 5/5 [00:16<00:00,  3.40s/it]
[32m2025-12-01 09:13:40.694[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m16[0m - [1mLoading OCR model from kha-white/manga-ocr-base[0m


Model unloaded

OCR EVALUATION METRICS (TEXT BBOX)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ Metric                     ┃ Value   ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━┫
┃ BBox Type                  ┃ TEXT    ┃
┃ Character Error Rate (CER) ┃ 0.1173  ┃
┃ Word Error Rate (WER)      ┃ 0.5876  ┃
┃ Number of Text Samples     ┃ 93      ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━┛

EVALUATING WITH BUBBLE BBOX


[32m2025-12-01 09:13:43.634[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m22[0m - [1mUsing CUDA[0m
[32m2025-12-01 09:13:43.951[0m | [1mINFO    [0m | [36mmanga_ocr.ocr[0m:[36m__init__[0m:[36m35[0m - [1mOCR ready[0m


MangaOCR model loaded
Starting OCR evaluation with bubble bboxes...


100%|██████████| 5/5 [00:14<00:00,  2.92s/it]

Model unloaded

OCR EVALUATION METRICS (BUBBLE BBOX)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ Metric                     ┃ Value   ┃
┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━╋━━━━━━━━━┫
┃ BBox Type                  ┃ BUBBLE  ┃
┃ Character Error Rate (CER) ┃ 0.1173  ┃
┃ Word Error Rate (WER)      ┃ 0.5876  ┃
┃ Number of Text Samples     ┃ 93      ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━┛

COMPARISON RESULTS
┏━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃         ┃   Text BBox ┃   Bubble BBox ┃ Difference   ┃
┣━━━━━━━━━╋━━━━━━━━━━━━━╋━━━━━━━━━━━━━━━╋━━━━━━━━━━━━━━┫
┃ CER     ┃      0.1173 ┃        0.1173 ┃ 0.0000       ┃
┃ WER     ┃      0.5876 ┃        0.5876 ┃ 0.0000       ┃
┃ Samples ┃     93      ┃       93      ┃              ┃
┗━━━━━━━━━┻━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛



