In [None]:
import json
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from PIL import Image

NOTEBOOK_DIR = os.getcwd()

if not NOTEBOOK_DIR.endswith("ocr-hkd"):
    raise ValueError("Please set the working directory to 'ocr-hkd' folder. Currently it is set to: " + NOTEBOOK_DIR)

BASE_DIR = os.path.join(NOTEBOOK_DIR, "..", "..")
DATASET_DIR = os.path.join(BASE_DIR, 'data', 'Manga109_released_2023_12_07')

In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================
JSON_PATH = os.path.join(BASE_DIR, 'data', 'manga109_ocr_dataset.json')
IMAGE_ROOT = os.path.join(BASE_DIR, 'data', 'Manga109_released_2023_12_07', 'images')

# Target to visualize
TARGET_BOOK = "YumeiroCooking"
TARGET_PAGE = "087.jpg" # Make sure this matches the filename format (usually 3 digits)

def visualize_page(book_name, page_name):
    # 1. Load JSON Data
    print(f"Loading annotations from {JSON_PATH}...")
    if not os.path.exists(JSON_PATH):
        print("Error: JSON file not found. Please run create_annotations.py first.")
        return

    with open(JSON_PATH, 'r', encoding='utf-8') as f:
        dataset = json.load(f)

    # 2. Filter data for the specific page
    target_path = f"{book_name}/{page_name}"
    page_annotations = [item for item in dataset if item['img_path'] == target_path]
    
    if not page_annotations:
        print(f"No annotations found for {target_path}")
        return

    print(f"Found {len(page_annotations)} objects for {target_path}")

    # 3. Load Image
    img_full_path = os.path.join(IMAGE_ROOT, book_name, page_name)
    if not os.path.exists(img_full_path):
        print(f"Error: Image not found at {img_full_path}")
        return
    
    image = Image.open(img_full_path).convert("RGB")

    # 4. Visualization
    fig, ax = plt.subplots(figsize=(15, 20))
    ax.imshow(image)

    print("\n--- TEXT CONTENT ---")

    for i, ann in enumerate(page_annotations):
        bbox = ann['bbox'] # [xmin, ymin, xmax, ymax]
        mask = ann.get('mask', [])
        text = ann['text']
        obj_type = ann['type']
        
        # Color settings: Bubble = Blue, Orphan Text = Red
        color = 'blue' if obj_type == 'bubble' else 'red'
        
        # A. Draw Bounding Box
        width = bbox[2] - bbox[0]
        height = bbox[3] - bbox[1]
        rect = patches.Rectangle(
            (bbox[0], bbox[1]), width, height,
            linewidth=2, edgecolor=color, facecolor='none', linestyle='-'
        )
        ax.add_patch(rect)

        # B. Draw Mask (Polygon) - Only for Bubbles
        if mask:
            for poly_coords in mask:
                # poly_coords is [x1, y1, x2, y2, ...]
                # Reshape to [[x1, y1], [x2, y2], ...]
                pts = np.array(poly_coords).reshape(-1, 2)
                
                # Draw filled polygon with transparency
                poly_patch = patches.Polygon(
                    pts, 
                    closed=True, 
                    facecolor=color, 
                    alpha=0.3, # Transparency
                    edgecolor=None
                )
                ax.add_patch(poly_patch)

        # C. Add ID Label on Image
        ax.text(
            bbox[0], bbox[1] - 5, 
            f"ID:{i}", 
            color='white', 
            fontsize=10, 
            bbox=dict(facecolor=color, alpha=0.8, pad=1)
        )

        # Print text to console (to avoid Japanese font issues in matplotlib)
        print(f"[ID:{i}] Type: {obj_type} | Split: {ann['split']}")
        print(f"      Text: {text}")
        print("-" * 30)

    plt.title(f"Visualization: {target_path} (Blue=Bubble/Mask, Red=Orphan Text)", fontsize=15)
    plt.axis('off')
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    visualize_page(TARGET_BOOK, TARGET_PAGE)