In [None]:
# File and data handling
import os
import os
import xml.etree.ElementTree as ET
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import japanize_matplotlib

In [None]:
NOTEBOOK_DIR = os.getcwd()

if not NOTEBOOK_DIR.endswith("ocr-hkd"):
    raise ValueError("Please set the working directory to 'ocr-hkd' folder. Currently it is set to: " + NOTEBOOK_DIR)

BASE_DIR = os.path.join(NOTEBOOK_DIR, "..", "..")
DATASET_DIR = os.path.join(BASE_DIR, 'data', 'Manga109_released_2023_12_07')

In [None]:
def get_book_names():
    """Get list of available book names"""
    annot_path = os.path.join(DATASET_DIR, "annotations")
    if not os.path.exists(annot_path):
        print(f"Error: annotations folder not found at {annot_path}")
        return []
    
    book_names = [f.replace(".xml", "") for f in os.listdir(annot_path) if f.endswith(".xml")]
    return sorted(book_names)

def build_image_filename(page_element):
    """
    Build image filename from page element attributes.
    Manga109 uses zero-padded 3-digit format: 000.jpg, 001.jpg, etc.
    """
    # Try different possible attribute names for page number
    page_num = None
    for attr_name in ["id", "index", "page", "num", "number"]:
        if attr_name in page_element.attrib:
            try:
                page_num = int(page_element.get(attr_name))
                break
            except ValueError:
                continue
    
    if page_num is None:
        # Debug: show available attributes
        print(f"Debug - Available page attributes: {list(page_element.attrib.keys())}")
        return None
    
    # Convert to zero-padded 3-digit format
    return f"{page_num:03d}.jpg"

def visualize_bubbles(book_name, page_idx=0, max_crops=9):
    """
    Visualize bubble crops and text from a specific page
    
    Args:
        book_name: Book name (without .xml extension)
        page_idx: Page index to visualize (0-based)
        max_crops: Maximum number of bubbles to display
    """
    # 1. Parse XML file
    xml_path = os.path.join(DATASET_DIR, "annotations", f"{book_name}.xml")
    if not os.path.exists(xml_path):
        print(f"Error: XML file not found at {xml_path}")
        return
    
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
    except ET.ParseError as e:
        print(f"Error parsing XML: {e}")
        return
    
    # 2. Get page elements
    pages = root.findall(".//page")
    if not pages:
        print("Error: No pages found in XML")
        return
    
    if page_idx >= len(pages):
        print(f"Error: Page index {page_idx} out of range. Total pages: {len(pages)}")
        return
    
    page = pages[page_idx]
    
    # 3. Get page index and construct filename (000.jpg format)
    page_index = page.get("index")
    if page_index is None:
        print(f"Error: No 'index' attribute in page. Available: {list(page.attrib.keys())}")
        return
    img_file = f"{int(page_index):03d}.jpg"
    
    # 4. Load image
    img_dir = os.path.join(DATASET_DIR, "images", book_name)
    if not os.path.exists(img_dir):
        print(f"Error: Image directory not found at {img_dir}")
        return
    
    img_path = os.path.join(img_dir, img_file)
    if not os.path.exists(img_path):
        print(f"Error: Image file not found at {img_path}")
        # List available files for debugging
        files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png'))]
        if files:
            print(f"Available image files (first 5): {files[:5]}")
        return
    
    try:
        full_img = Image.open(img_path).convert("RGB")
        print(f"Loaded image: {img_path} (size: {full_img.size})")
    except Exception as e:
        print(f"Error loading image: {e}")
        return
    
    # 5. Extract text elements directly (each is a bubble with text)
    bubbles = []
    text_elements = page.findall(".//text")
    if not text_elements:
        print("Warning: No text elements found in this page")

    for text_elem in text_elements:
        try:
            # Get text content
            text = text_elem.text.strip() if text_elem.text else ""

            # Get bbox directly from element attributes
            xmin = int(text_elem.get("xmin", 0))
            ymin = int(text_elem.get("ymin", 0))
            xmax = int(text_elem.get("xmax", 0))
            ymax = int(text_elem.get("ymax", 0))
        
            # Validate coordinates
            if xmax <= xmin or ymax <= ymin:
                continue
        
            # Crop image
            crop = full_img.crop((xmin, ymin, xmax, ymax))
        
            bubbles.append({
                "crop": crop,
                "text": text,
                "bbox": (xmin, ymin, xmax, ymax),
                "type": "text"
            })
        
        except (ValueError, TypeError, AttributeError) as e:
            print(f"Warning: Failed to process text element: {e}")
            continue
    
    # 6. Display results
    n_cols = 3
    n_rows = min((len(bubbles) + n_cols - 1) // n_cols, (max_crops + n_cols - 1) // n_cols)
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows))
    if n_rows == 1:
        axes = axes.reshape(1, -1) if n_cols > 1 else axes.reshape(1, 1)
    
    for idx, bubble in enumerate(bubbles[:max_crops]):
        row, col = divmod(idx, n_cols)
        ax = axes[row, col]
        
        ax.imshow(bubble["crop"])
        
        # Truncate long text for display
        display_text = bubble["text"][:30] + "..." if len(bubble["text"]) > 30 else bubble["text"]
        
        # Set title with Japanese font support using fm directly
        title_text = f"Type: {bubble['type']}\nText: {display_text}"
        ax.set_title(title_text, fontsize=8)
        
        ax.axis("off")
    
    # Hide unused subplots
    for idx in range(len(bubbles), n_rows * n_cols):
        row, col = divmod(idx, n_cols)
        axes[row, col].axis("off")
    
    plt.tight_layout()
    plt.show()

# Example usage
if __name__ == "__main__":
    # Show available books
    books = get_book_names()
    print(f"Available books (showing first 5): {books[:5]}")
    
    if books:
        # Visualize first book
        test_book = books[0]
        print(f"\nTesting with book: {test_book}")
        visualize_bubbles(test_book, page_idx=3, max_crops=20) # Change page_idx to show bubble in that page

In [None]:
import json
import random
from collections import defaultdict
from tqdm.notebook import tqdm
import matplotlib.patches as patches
import numpy as np

# 1. Configuration for Balloon Data
# We look for the JSONs that contain the segmentation data
JSON_DIR = os.path.join(BASE_DIR, 'data', 'MangaSegmentation', 'jsons_processed')
# We use the same image directory as before
IMAGE_ROOT_DIR = os.path.join(DATASET_DIR, 'images')
TARGET_CATEGORY_ID = 5  # ID for 'balloon' in the JSON dataset

def prepare_manga_balloon_data(json_dir, image_root):
    """
    Loads JSON files, extracts balloon polygons, and links them to images.
    """
    if not os.path.exists(json_dir):
        print(f"Error: JSON directory not found at {json_dir}")
        return []

    all_images = {}
    all_annotations = defaultdict(list)
    
    # Get list of json files
    json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
    print(f"Found {len(json_files)} JSON files. Loading data...")

    for json_file in tqdm(json_files, desc="Parsing JSONs"):
        with open(os.path.join(json_dir, json_file), 'r') as f:
            data = json.load(f)
            # Map image IDs to file info
            for img_info in data.get('images', []):
                all_images[img_info['id']] = img_info
            # Map image IDs to annotations
            for ann_info in data.get('annotations', []):
                all_annotations[ann_info['image_id']].append(ann_info)

    dataset_records = []
    for img_id, img_info in all_images.items():
        # Construct full image path
        record = {
            "file_name": os.path.join(image_root, img_info['file_name']),
            "image_id": img_id,
            "height": img_info['height'],
            "width": img_info['width'],
        }
        
        # Filter for balloons (Category ID 5)
        balloon_annotations = []
        for ann in all_annotations.get(img_id, []):
            if ann.get('category_id') == TARGET_CATEGORY_ID:
                if ann.get('segmentation'):
                    balloon_annotations.append(ann)
        
        if balloon_annotations:
            record["annotations"] = balloon_annotations
            dataset_records.append(record)
            
    return dataset_records

def visualize_balloon_bboxes_from_json(records, num_samples=3):
    """
    Visualizes images with calculated bounding boxes around bubbles.
    """
    if not records:
        print("No records found to visualize.")
        return

    # Pick random samples
    samples = random.sample(records, min(num_samples, len(records)))

    for record in samples:
        img_path = record["file_name"]
        
        if not os.path.exists(img_path):
            print(f"Image not found: {img_path}")
            continue
            
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image: {e}")
            continue

        # Setup Plot
        plt.figure(figsize=(10, 10))
        ax = plt.gca()
        ax.imshow(image)
        
        # Draw Boxes
        for ann in record["annotations"]:
            # Segmentation is a list of polygons (usually just one)
            # Format: [[x1, y1, x2, y2, ...]]
            for poly in ann.get("segmentation", []):
                if len(poly) < 4: continue
                
                # Extract X and Y coordinates
                xs = poly[0::2]
                ys = poly[1::2]
                
                # Calculate Bounding Box from Polygon
                xmin, xmax = min(xs), max(xs)
                ymin, ymax = min(ys), max(ys)
                width = xmax - xmin
                height = ymax - ymin
                
                # Draw Blue Rectangle
                rect = patches.Rectangle(
                    (xmin, ymin), width, height,
                    linewidth=2, edgecolor='blue', facecolor='none'
                )
                ax.add_patch(rect)
                
                # Optional: Add label
                ax.text(xmin, ymin-5, "Bubble", color='blue', fontsize=8, fontweight='bold')

        plt.axis('off')
        plt.title(f"Image ID: {record['image_id']}\nSource: {os.path.basename(img_path)}")
        plt.tight_layout()
        plt.show()

# --- EXECUTION ---
print("--- Loading Balloon Data from JSONs ---")
# 1. Load the data
balloon_data = prepare_manga_balloon_data(JSON_DIR, IMAGE_ROOT_DIR)

print(f"\nLoaded {len(balloon_data)} images containing balloons.")

# 2. Visualize
if balloon_data:
    print("\n--- Visualizing Calculated Bounding Boxes (Blue) ---")
    visualize_balloon_bboxes_from_json(balloon_data, num_samples=5)

In [None]:
def visualize_id_linkage(book_name, target_page_index, json_dataset):
    """
    Visualizes a specific page with:
    1. XML Text Bounding Boxes (Red) + Text ID (displayed ABOVE box)
    2. JSON Bubble Bounding Boxes (Blue) + Annotation ID (displayed BELOW box)
    """
    
    # --- 1. XML HANDLING (Text Data) ---
    xml_path = os.path.join(DATASET_DIR, "annotations", f"{book_name}.xml")
    if not os.path.exists(xml_path):
        print(f"XML not found: {xml_path}")
        return

    tree = ET.parse(xml_path)
    root = tree.getroot()
    pages = root.findall(".//page")
    
    # Validate page index
    if target_page_index >= len(pages):
        print(f"Page index {target_page_index} out of range.")
        return
        
    page_element = pages[target_page_index]
    page_img_index = int(page_element.get("index")) # The actual number used in filename (e.g., 5 -> 005.jpg)
    img_filename = f"{page_img_index:03d}.jpg"
    
    # --- 2. JSON HANDLING (Bubble Data) ---
    # Find the record in your loaded json_dataset that matches this specific image
    # We match by checking if the file_name ends with "BookName/00X.jpg"
    target_suffix = f"{book_name}/{img_filename}"
    
    matching_json_record = None
    for record in json_dataset:
        # Normalize paths to handle Windows/Linux slashes
        if record['file_name'].replace('\\', '/').endswith(target_suffix):
            matching_json_record = record
            break
            
    if not matching_json_record:
        print(f"Warning: No JSON segmentation data found for {target_suffix}")
        # We proceed anyway to show the Text boxes at least
    
    # --- 3. LOAD IMAGE ---
    img_path = os.path.join(DATASET_DIR, "images", book_name, img_filename)
    if not os.path.exists(img_path):
        print(f"Image not found: {img_path}")
        return
        
    image = Image.open(img_path).convert("RGB")

    # --- 4. PLOTTING ---
    plt.figure(figsize=(12, 16))
    ax = plt.gca()
    ax.imshow(image)
    
    # A. Draw Text Boxes (from XML) - RED
    text_nodes = page_element.findall(".//text")
    print(f"Found {len(text_nodes)} text regions (XML)")
    
    for text in text_nodes:
        try:
            xmin = int(text.get("xmin"))
            ymin = int(text.get("ymin"))
            xmax = int(text.get("xmax"))
            ymax = int(text.get("ymax"))
            text_id = text.get("id")
            
            # Draw Box
            rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                                     linewidth=2, edgecolor='red', facecolor='none', linestyle='-')
            ax.add_patch(rect)
            
            # Draw ID ABOVE the box
            ax.text(xmin, ymin - 5, f"T: {text_id}", 
                    color='white', fontsize=9, fontweight='bold', 
                    bbox=dict(facecolor='red', alpha=0.7, pad=1))
        except:
            continue

    # B. Draw Bubble Boxes (from JSON) - BLUE
    if matching_json_record:
        annotations = matching_json_record.get("annotations", [])
        print(f"Found {len(annotations)} balloon regions (JSON)")
        
        for ann in annotations:
            ann_id = ann.get("id") # The unique COCO ID for this polygon
            
            for poly in ann.get("segmentation", []):
                if len(poly) < 4: continue
                
                xs = poly[0::2]
                ys = poly[1::2]
                xmin, xmax = min(xs), max(xs)
                ymin, ymax = min(ys), max(ys)
                
                # Draw Box
                rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, 
                                         linewidth=2, edgecolor='blue', facecolor='none', linestyle='--')
                ax.add_patch(rect)
                
                # Draw ID BELOW the box
                ax.text(xmin, ymax + 15, f"B: {ann_id}", 
                        color='white', fontsize=9, fontweight='bold',
                        bbox=dict(facecolor='blue', alpha=0.7, pad=1))

    plt.axis('off')
    plt.title(f"Linkage Check: {book_name} - Page {target_page_index}\nRed = Text (XML), Blue = Bubble (JSON)", fontsize=14)
    plt.tight_layout()
    plt.show()

# --- EXECUTION ---
# Pick a book and page index (0-based) to verify

if 'balloon_data' in locals() and balloon_data:
    # Example 1: Use the first book available
    test_book = get_book_names()[0] 
    test_page = 6  # Change this number to look at different pages
    
    print(f"Visualizing: {test_book}, Page Index: {test_page}")
    visualize_id_linkage(test_book, test_page, balloon_data)
else:
    print("Please run the previous cell to load 'balloon_data' first.")