In [None]:
import os

ENDWITHS = 'Pipelines'
NOTEBOOK_DIR = os.getcwd()

if not NOTEBOOK_DIR.endswith(ENDWITHS):
    raise ValueError(f"Not in correct dir, expect end with {ENDWITHS}, but got {NOTEBOOK_DIR} instead")

# Define the base directory relative to the current notebook's location.
BASE_DIR = os.path.join(NOTEBOOK_DIR, '..', '..', '..')

In [None]:
import sys
# Add the project's 'code' directory to the Python path to import custom modules.
sys.path.insert(0, os.path.join(BASE_DIR, 'code'))

# Import necessary libraries and modules
from ultralytics import YOLO
from code.pipeline.OCRModels.MangaOCR import MangaOCRModel
from pipeline.TranslationModels.ElanMtJaEnTranslator import ElanMtJaEnTranslator
import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from ipywidgets import interact, IntSlider

In [None]:
# Define paths to the trained YOLO model and the example manga image.
YOLO_MODEL_PATH = os.path.join(BASE_DIR, 'best.pt')
EX_IMG_PATH = os.path.join(BASE_DIR, "data/Manga109_released_2023_12_07/images/MAD_STONE/006.jpg")

In [None]:
# Load the pre-trained YOLO model.
yolo_model = YOLO(YOLO_MODEL_PATH)

# Run the prediction on the source image.
results = yolo_model.predict(source=EX_IMG_PATH)

In [None]:
# The prediction returns a list of results; 
# we are processing a single image, so we take the first element.
result = results[0]

In [None]:
# Convert the original image from BGR (OpenCV's default) to RGB for display.
image_rgb = cv2.cvtColor(result.orig_img, cv2.COLOR_BGR2RGB)

# Extract the raw detection data from the YOLO result object.
boxes = result.boxes.xyxy.cpu().numpy()  # Bounding boxes
masks_xy = result.masks.xy              # Segmentation masks as polygon points

print(f"Initial YOLO detection found {len(boxes)} regions.")

In [None]:
# Increase recursion limit to handle complex cases (optional)
sys.setrecursionlimit(100)

def attempt_split_once(bubble_mask):
    """
    Core function: Attempt to split the mask once.
    Returns a list of sub-masks (if split is successful) or [original mask] (if not split).
    (This is the refined logic from the original split_connected_bubbles function)
    """
    
    # --- HYPERPARAMETERS ---
    # Adjusted based on your tuning
    MIN_DEFECT_DEPTH = 13         # Minimum depth of a defect to be considered
    MAX_ANGLE_DEG = 160           # Maximum angle (allows for flatter connections)
    MIN_DIST_BETWEEN_DEFECTS = 20 # Minimum distance to consider defects distinct
    
    # 1. Basic Checks
    contours, _ = cv2.findContours(bubble_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours: return [bubble_mask]
    
    contour = max(contours, key=cv2.contourArea)
    # If the contour is too small (debris), do not attempt to cut.
    if cv2.contourArea(contour) < 500: return [bubble_mask] 

    # 2. Compute Convex Hull & Convexity Defects
    try:
        hull_indices = cv2.convexHull(contour, returnPoints=False)
        if hull_indices is None or len(hull_indices) < 3: return [bubble_mask]
        defects = cv2.convexityDefects(contour, hull_indices)
    except: return [bubble_mask]

    if defects is None: return [bubble_mask]

    # 3. Find Potential Cut Candidates
    candidates = []
    for i in range(defects.shape[0]):
        s, e, f, d = defects[i, 0]
        depth = d / 256.0
        
        if depth > MIN_DEFECT_DEPTH:
            start = tuple(contour[s][0])
            end = tuple(contour[e][0])
            far = tuple(contour[f][0])
            
            # Calculate vectors
            v1 = np.array(start) - np.array(far)
            v2 = np.array(end) - np.array(far)
            n1, n2 = np.linalg.norm(v1), np.linalg.norm(v2)
            
            if n1 == 0 or n2 == 0: continue
            
            # Calculate angle using Cosine Law
            cosine = np.dot(v1, v2) / (n1 * n2)
            angle = np.degrees(np.arccos(np.clip(cosine, -1.0, 1.0)))
            
            if angle < MAX_ANGLE_DEG:
                candidates.append({'point': far, 'depth': depth})

    # 4. Clustering (Filter out defects that are too close to each other)
    candidates.sort(key=lambda x: x['depth'], reverse=True)
    unique_candidates = []
    for cand in candidates:
        is_distinct = True
        for exist in unique_candidates:
            dist = np.linalg.norm(np.array(cand['point']) - np.array(exist['point']))
            if dist < MIN_DIST_BETWEEN_DEFECTS:
                is_distinct = False
                break
        if is_distinct: unique_candidates.append(cand)

    # 5. Execute the Cut
    split_mask = bubble_mask.copy()
    cut_happened = False
    
    if len(unique_candidates) >= 2:
        # --- LOGIC UPDATE: Shortest Distance Pair ---
        # Instead of picking the top 2 deepest points, we pick the pair 
        # with the shortest Euclidean distance to avoid diagonal cuts.
        best_pair = None
        min_cut_dist = float('inf')
        
        # We only check the top 4 deepest candidates to maintain performance/relevance
        consider_list = unique_candidates[:4]
        
        for i in range(len(consider_list)):
            for j in range(i + 1, len(consider_list)):
                p1 = consider_list[i]['point']
                p2 = consider_list[j]['point']
                
                dist = np.linalg.norm(np.array(p1) - np.array(p2))
                
                if dist < min_cut_dist:
                    min_cut_dist = dist
                    best_pair = (p1, p2)
        
        if best_pair:
            cv2.line(split_mask, best_pair[0], best_pair[1], 0, 3)
            cut_happened = True
        
    elif len(unique_candidates) == 1:
        # Cut from the single defect point towards the centroid
        p1 = unique_candidates[0]['point']
        M = cv2.moments(contour)
        if M["m00"] != 0:
            cx, cy = int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"])
            # Extend the cut slightly beyond the centroid
            dx, dy = cx - p1[0], cy - p1[1]
            target = (int(cx + dx*0.5), int(cy + dy*0.5))
            cv2.line(split_mask, p1, target, 0, 3)
            cut_happened = True

    # 6. Return Results
    if cut_happened:
        new_contours, _ = cv2.findContours(split_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        new_masks = []
        for c in new_contours:
            # Filter out small debris created by the cut
            if cv2.contourArea(c) > 100: 
                m = np.zeros_like(bubble_mask)
                cv2.drawContours(m, [c], -1, 255, -1)
                new_masks.append(m)
        
        # Only return the new masks if we successfully split into > 1 piece
        if len(new_masks) > 1:
            return new_masks

    # Return original if no valid split occurred
    return [bubble_mask]

# --- MAIN FUNCTION: RECURRENCE ---
def split_connected_bubbles(bubble_mask):
    """
    Recursive Wrapper function. It will call attempt_split_once, 
    if split successfully then call recursively on the child pieces.
    """
    # 1. Try cutting the current mask
    initial_results = attempt_split_once(bubble_mask)
    
    # 2. Stopping condition: If the result is still itself (cannot cut anymore)
    # Or the number of pieces returned is 1
    if len(initial_results) == 1:
        return initial_results
    
    # 3. Recursive Step:
    # If it can be cut into multiple pieces (eg: A and B-C),
    # We throw each piece into this function for further processing (B-C -> B and C).
    final_bubbles = []
    for sub_mask in initial_results:
        sub_results = split_connected_bubbles(sub_mask)
        final_bubbles.extend(sub_results)
        
    return final_bubbles

In [None]:
# --- TOOLS FOR TUNING HYPERPARAMETER (INTERACTIVE) ---
# Run this cell to enable drag and drop interface.
# After finding 3 numbers you like, go back to Cell 7 and edit it into the official code.

import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider
import cv2
import numpy as np

# 1. Visualizer function (Updated with SHORTEST DISTANCE logic)
def visualize_bubble_split(bubble_mask, min_depth, max_angle, min_dist):

    debug_img = cv2.cvtColor(bubble_mask, cv2.COLOR_GRAY2BGR)
    
    contours, _ = cv2.findContours(bubble_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours: return debug_img
    contour = max(contours, key=cv2.contourArea)
    
    cv2.drawContours(debug_img, [contour], -1, (0, 255, 255), 2)

    try:
        hull_indices = cv2.convexHull(contour, returnPoints=False)
        defects = cv2.convexityDefects(contour, hull_indices)
    except: return debug_img
    
    if defects is None: return debug_img

    candidates = []
    for i in range(defects.shape[0]):
        s, e, f, d = defects[i, 0]
        depth = d / 256.0
        start = tuple(contour[s][0])
        end = tuple(contour[e][0])
        far = tuple(contour[f][0])

        v1 = np.array(start) - np.array(far)
        v2 = np.array(end) - np.array(far)
        norm1, norm2 = np.linalg.norm(v1), np.linalg.norm(v2)
        
        angle_deg = 180
        if norm1 > 0 and norm2 > 0:
            cosine = np.dot(v1, v2) / (norm1 * norm2)
            angle_deg = np.degrees(np.arccos(np.clip(cosine, -1.0, 1.0)))

        # Display logic: Green = Pass, Red = Fail
        color = (0, 0, 255) 
        if depth > min_depth and angle_deg < max_angle:
            color = (0, 255, 0) 
            candidates.append({'point': far, 'depth': depth})
        
        cv2.circle(debug_img, far, 5, color, -1)
        # Note the Depth and Angle next to that point
        if depth > 10: 
            cv2.putText(debug_img, f"{int(depth)}|{int(angle_deg)}", (far[0]+10, far[1]), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

    # Clustering
    candidates.sort(key=lambda x: x['depth'], reverse=True)
    unique_candidates = []
    for cand in candidates:
        is_distinct = True
        for exist in unique_candidates:
            if np.linalg.norm(np.array(cand['point']) - np.array(exist['point'])) < min_dist:
                is_distinct = False; break
        if is_distinct: unique_candidates.append(cand)

    # --- SHORTEST DISTANCE ---
    if len(unique_candidates) >= 2:
        best_pair = None
        min_cut_dist = float('inf')
        
        # Consider top 4 deepest points
        consider_list = unique_candidates[:4]
        
        for i in range(len(consider_list)):
            for j in range(i + 1, len(consider_list)):
                p1 = consider_list[i]['point']
                p2 = consider_list[j]['point']
                
                # Calculate distance
                dist = np.linalg.norm(np.array(p1) - np.array(p2))
                
                if dist < min_cut_dist:
                    min_cut_dist = dist
                    best_pair = (p1, p2)
        
        if best_pair:
            cv2.line(debug_img, best_pair[0], best_pair[1], (255, 0, 0), 3) # Blue Line
            cv2.putText(debug_img, "CUT: SHORTEST", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
            
    elif len(unique_candidates) == 1:
        p1 = unique_candidates[0]['point']
        M = cv2.moments(contour)
        if M["m00"] != 0:
            cx, cy = int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"])
            cv2.line(debug_img, p1, (cx, cy), (255, 0, 0), 3)
            cv2.putText(debug_img, "CUT: CENTROID", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

    return debug_img

# 2. Main control function
def interactive_tuner(bubble_index, min_depth, max_angle, min_dist):
    if len(masks_xy) == 0:
        print("No masks found from YOLO!")
        return

    # Get mask from YOLO result 
    points = masks_xy[bubble_index]
    h, w = result.orig_img.shape[:2]
    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [np.array(points, dtype=np.int32)], 255)

    # Crop (zoom) into that bubble
    x, y, w_b, h_b = cv2.boundingRect(np.array(points, dtype=np.int32))
    pad = 30
    y1, y2 = max(0, y-pad), min(h, y+h_b+pad)
    x1, x2 = max(0, x-pad), min(w, x+w_b+pad)
    mask_crop = mask[y1:y2, x1:x2]

    # Run visualizer
    viz_img = visualize_bubble_split(mask_crop, min_depth, max_angle, min_dist)

    # Plot
    plt.figure(figsize=(5, 5))
    plt.imshow(viz_img)
    plt.title(f"Bubble Index: {bubble_index} (Total: {len(masks_xy)})")
    plt.axis('off')
    plt.show()

# 3. Initialize interface
print("--- PARAMETERS TUNER (UPDATED LOGIC) ---")
print("1. Drag 'bubble_index' to find the stuck bubble.")
print("2. The BLUE line now represents the 'Shortest Distance' cut.")
print("3. Remember the 3 numbers and update Cell 7.")

interact(interactive_tuner, 
         bubble_index=IntSlider(min=0, max=len(masks_xy)-1, step=1, value=0),
         min_depth=IntSlider(min=5, max=100, step=1, value=20, description='Depth'),
         max_angle=IntSlider(min=10, max=180, step=5, value=145, description='Angle'), # Default updated to 145
         min_dist=IntSlider(min=5, max=100, step=5, value=20, description='Dist'));

In [None]:
# This list will store the final, individual bubbles after processing.
refined_bubble_list = []

print("Processing raw YOLO masks...")
# Iterate over each detected region from YOLO.
for mask_points in masks_xy:
    # Create a binary mask from the polygon points.
    initial_mask = np.zeros(image_rgb.shape[:2], dtype=np.uint8)
    cv2.fillPoly(initial_mask, [np.array(mask_points, dtype=np.int32)], 255)

    # Attempt to split the mask into individual bubbles.
    split_masks = split_connected_bubbles(initial_mask)

    # Process each resulting mask (could be one or more).
    for single_mask in split_masks:
        contours, _ = cv2.findContours(single_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours:
            continue
        
        contour = max(contours, key=cv2.contourArea)
        # Calculate the precise bounding box for this individual bubble.
        bbox = cv2.boundingRect(contour)
        
        # Store the refined bubble information.
        refined_bubble_list.append({
            'mask': single_mask,
            'bbox': bbox,
            'contour': contour
        })

# Sort bubbles by reading order (top-to-bottom, then right-to-left for Japanese manga).
refined_bubble_list.sort(key=lambda b: (b['bbox'][1], -b['bbox'][0]))

print(f"After splitting, we have {len(refined_bubble_list)} individual bubbles.")

In [None]:
# --- Create a Side-by-Side Visualization (Before vs After) ---

print("Generating comparison visualization...")

# 1. Setup Plot
# Calculate aspect ratio
ratio = image_rgb.shape[1] / image_rgb.shape[0]
width = 20
height = width / ratio
# Create 2 subplots vertically. We double the height.
fig, axes = plt.subplots(2, 1, figsize=(width, height * 2))

# =========================================================
# PLOT 1: BEFORE Applying Convexity Split (Raw YOLO Output)
# =========================================================
vis_before = image_rgb.copy()
overlay_before = image_rgb.copy()
alpha = 0.4

# Retrieve raw masks from YOLO result 
raw_masks_points = result.masks.xy

for i, points in enumerate(raw_masks_points):
    # Convert points to integer format for OpenCV
    contour = np.array(points, dtype=np.int32)
    
    # 1. Draw Segmentation Mask
    color = np.random.randint(50, 255, size=3).tolist()
    cv2.drawContours(overlay_before, [contour], -1, color, thickness=cv2.FILLED)
    
    # 2. Draw Bounding Box (Calculated from mask contour)
    x, y, w, h = cv2.boundingRect(contour)
    rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor='red', facecolor='none')
    axes[0].add_patch(rect)
    
    # 3. Add ID Label
    axes[0].text(x, y - 5, f'Raw: {i}', fontsize=12, color='white',
                 bbox=dict(facecolor='red', alpha=0.8, pad=0.5, edgecolor='none'),
                 verticalalignment='bottom')

# Blend and Display
vis_before = cv2.addWeighted(overlay_before, alpha, vis_before, 1 - alpha, 0)
axes[0].imshow(vis_before)
axes[0].set_title(f"BEFORE: Raw YOLO Output ({len(raw_masks_points)} bubbles)", fontsize=18, fontweight='bold', color='red')
axes[0].axis('off')

# =========================================================
# PLOT 2: AFTER Applying Convexity Split (Refined Bubbles)
# =========================================================
vis_after = image_rgb.copy()
overlay_after = image_rgb.copy()

for i, bubble in enumerate(refined_bubble_list):
    # 1. Draw Segmentation Mask
    color = np.random.randint(50, 255, size=3).tolist()
    contour = bubble['contour']
    cv2.drawContours(overlay_after, [contour], -1, color, thickness=cv2.FILLED)
    
    # 2. Draw Bounding Box
    x, y, w, h = bubble['bbox']
    rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor='lime', facecolor='none')
    axes[1].add_patch(rect)

    # 3. Add ID Label
    axes[1].text(x, y - 5, f'ID: {i}', fontsize=12, color='black',
                 bbox=dict(facecolor='lime', alpha=0.8, pad=0.5, edgecolor='none'),
                 verticalalignment='bottom')

# Blend and Display
vis_after = cv2.addWeighted(overlay_after, alpha, vis_after, 1 - alpha, 0)
axes[1].imshow(vis_after)
axes[1].set_title(f"AFTER: Refined with Convexity Split ({len(refined_bubble_list)} bubbles)", fontsize=18, fontweight='bold', color='green')
axes[1].axis('off')

plt.tight_layout(pad=1.0) # Add some padding between plots
plt.show()

In [None]:
# Initialize and load the OCR model.
manga_ocr_model = MangaOCRModel()
manga_ocr_model.load_model()

print("\nRunning OCR on each individual bubble...")
image_rgb_array = np.array(image_rgb)

# Iterate through the refined list of bubbles.
for i, bubble in enumerate(refined_bubble_list):
    x, y, w, h = bubble['bbox']
    
    # Crop the image using the precise bounding box.
    cropped_image = image_rgb_array[y:y+h, x:x+w]
    
    # Perform OCR on the cropped image.
    text = manga_ocr_model.predict(cropped_image)
    
    # Store the recognized text back into the bubble's dictionary.
    bubble['ocr_text'] = text
    
    print(f"Bubble ID {i}: {text}")

In [None]:
# Initialize and load the translation model.
model_trans = ElanMtJaEnTranslator()
model_trans.load_model()

print("\n--- Translation Results ---")
# Iterate through the bubbles again to translate the OCR'd text.
for i, bubble in enumerate(refined_bubble_list):
    ocr_text = bubble.get('ocr_text', '')
    
    if ocr_text.strip():
        translated_text = model_trans.predict(ocr_text)
        bubble['translated_text'] = translated_text
    else:
        bubble['translated_text'] = ''

# Print the final results for verification.
for i, bubble in enumerate(refined_bubble_list):
    print(f"Bbox ID: {i}")
    print(f"  - OCR: {bubble.get('ocr_text', '')}")
    print(f"  - Translation: {bubble.get('translated_text', '')}\n")

In [None]:
FONT_PATH = "/home/zendragonxxx/miniconda3/envs/myenv/lib/python3.12/site-packages/matplotlib/mpl-data/fonts/ttf/ComicSansMS3.ttf"
    
def generate_lines(draw, text, font, max_width):
    """
    Intelligently wraps text to fit within a maximum width.
    It can also break long words that exceed the max_width.
    """
    lines = []
    words = text.split()
    
    current_line = ""
    for word in words:
        # Handle the case where a single word is longer than the allowed width.
        if draw.textbbox((0,0), word, font=font)[2] > max_width:
            if current_line:
                lines.append(current_line.strip())
                current_line = ""
            
            temp_word = ""
            for char in word:
                if draw.textbbox((0,0), temp_word + char, font=font)[2] <= max_width:
                    temp_word += char
                else:
                    lines.append(temp_word)
                    temp_word = char
            current_line = temp_word
            continue

        # Standard word wrapping logic.
        if not current_line:
             current_line = word
        elif draw.textbbox((0,0), current_line + " " + word, font=font)[2] <= max_width:
            current_line += " " + word
        else:
            lines.append(current_line.strip())
            current_line = word
            
    if current_line:
        lines.append(current_line.strip())
        
    return lines

def fit_text_in_bubble(draw, text, font_path, bounding_box, text_color):
    """
    Finds the largest possible font size for the text to fit inside the
    bounding box, prioritizing line wrapping before reducing font size.
    """
    x, y, w, h = bounding_box
    if w <= 0 or h <= 0 or not font_path: return

    font_size = h
    final_lines = []
    final_font = None

    # Start with a large font size and decrease until the text fits.
    while font_size > 5:
        font = ImageFont.truetype(font_path, font_size)
        lines = generate_lines(draw, text, font, w)
        total_height = sum([draw.textbbox((0, 0), line, font=font)[3] for line in lines])
        
        if total_height <= h:
            final_lines = lines
            final_font = font
            break
        
        font_size -= 3
    
    # If no suitable size was found, use the last (smallest) tried size.
    if not final_font:
        final_font = ImageFont.truetype(font_path, font_size)
        final_lines = generate_lines(draw, text, final_font, w)

    # Draw the final text, centered within the bounding box.
    total_height = sum([draw.textbbox((0, 0), line, font=final_font)[3] for line in final_lines])
    current_y = y + (h - total_height) / 2
    
    for line in final_lines:
        line_height = draw.textbbox((0, 0), line, font=final_font)[3]
        line_width = draw.textbbox((0, 0), line, font=final_font)[2]
        current_x = x + (w - line_width) / 2
        draw.text((current_x, current_y), line, font=final_font, fill=text_color)
        current_y += line_height

In [None]:
EROSION_PIXELS = 3
TEXT_COLOR = (0, 0, 0) # Black

image_final = image_rgb.copy()
erosion_kernel = np.ones((2 * EROSION_PIXELS + 1, 2 * EROSION_PIXELS + 1), np.uint8)

# Iterate through the final, complete list of individual bubbles.
for bubble in refined_bubble_list:
    trans_text = bubble.get('translated_text', '')
    if not trans_text.strip():
        continue

    single_mask = bubble['mask']

    # STEP 1: Erode the mask to create a safe margin from the original text border.
    eroded_mask = cv2.erode(single_mask, erosion_kernel, iterations=1)
    
    # STEP 2: Find the contours of the eroded mask and paint the area white to clear it.
    contours, _ = cv2.findContours(eroded_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(image_final, contours, -1, (255, 255, 255), thickness=cv2.FILLED)
    
    # STEP 3: Find the bounding box of the cleared area, which is where we will draw the text.
    if contours:
        text_area_bbox = cv2.boundingRect(contours[0])
        
        # STEP 4: Convert to PIL Image and call the text fitting function.
        pil_image = Image.fromarray(image_final)
        draw = ImageDraw.Draw(pil_image)
        
        fit_text_in_bubble(draw, trans_text, FONT_PATH, text_area_bbox, TEXT_COLOR)
        
        # Convert back to numpy array for the next iteration.
        image_final = np.array(pil_image)

# --- Display the final, typeset image ---
ratio = image_final.shape[1] / image_final.shape[0]
width = 20
height = width / ratio

fig, ax = plt.subplots(1, 1, figsize=(width, height))
ax.imshow(image_final)
ax.axis('off')
plt.tight_layout(pad=0)
plt.show()