In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="T6GLHVi1ZPBseih858yD")
project = rf.workspace("ngoc-tfn96").project("speech-balloons-detection-txibf")
version = project.version(3)
dataset = version.download("voc")


In [None]:
import os
import subprocess
from pathlib import Path

END_WITH_LOCAL = 'bubble-detection'

os.environ['PATH'] = f"/root/.cargo/bin:{os.environ['PATH']}"

BASE_DIR = os.getcwd()
print(f"BASE_DIR: {BASE_DIR}")

# Simple validation
if not (BASE_DIR.endswith('/content') or BASE_DIR.endswith(END_WITH_LOCAL)):
    raise ValueError(f"Expected to be in .../{END_WITH_LOCAL} or .../content directory, but got: {BASE_DIR}")

In [None]:
import shutil
import os
from pathlib import Path

# Move and rename the annotation folder
source_dir = os.path.join(BASE_DIR, 'Speech-Balloons-Detection-3')
# Note: The destination is now inside the Manga109 data folder
dest_parent_dir = os.path.join(BASE_DIR, '../../data/Manga109/')
final_dest_dir = os.path.join(dest_parent_dir, 'Human_Annotate_300')

if os.path.isdir(source_dir):
    try:
        # If the destination directory already exists, remove it to ensure a clean move
        if os.path.exists(final_dest_dir):
            print(f"Destination '{final_dest_dir}' already exists. Removing it.")
            shutil.rmtree(final_dest_dir)
        
        # Move and rename the directory
        shutil.move(source_dir, final_dest_dir)
        print(f"Successfully moved and renamed '{source_dir}' to '{final_dest_dir}'")
    except Exception as e:
        print(f"An error occurred while moving the folder: {e}")
else:
    print(f"Source folder '{source_dir}' not found. Skipping move operation.")


In [None]:
import os
from pathlib import Path

Manga109_dir = os.path.join(BASE_DIR,'../../data/Manga109/Manga109_released_2023_12_07/images')


In [None]:
folders = [f.name for f in Path(Manga109_dir).iterdir() if f.is_dir()]
print(folders)

In [None]:
folders = sorted([f.name for f in Path(Manga109_dir).iterdir() if f.is_dir()])
print(folders)

In [None]:
original_image_path = []

# Get the first 30 sorted (case-sensitive) folders (volumes)
first_30_volumes = sorted([f for f in Path(Manga109_dir).iterdir() if f.is_dir()], key=lambda x: x.name)[:35]

# For each volume, get the first 11 images sorted in ascending order
for volume in first_30_volumes:
    images = sorted([f for f in volume.iterdir() if f.is_file() and f.suffix.lower() == '.jpg'], key=lambda x: x.name)
    for img_path in images[:21]:
        original_image_path.append(str(img_path))

print(len(original_image_path))

human_annotate_dir = os.path.join(BASE_DIR,'../../data/Manga109/Human_Annotate_300/train')

all_img_paths = []

# for root, dirs, files in os.walk(human_annotate_dir):
#     for file in files:
#         if file.lower().endswith(('.jpg', '.jpeg', '.png')):
#             all_img_paths.append(os.path.join(root, file))

# Only scan immediate directory (no subdirectories)
all_img_paths = []
for file in os.listdir(human_annotate_dir):
    file_path = os.path.join(human_annotate_dir, file)
    if os.path.isfile(file_path) and file.lower().endswith(('.jpg', '.jpeg', '.png')):
        all_img_paths.append(file_path)

print(len(all_img_paths))

for volume in first_30_volumes:
    os.makedirs(os.path.join(human_annotate_dir, volume.name), exist_ok=True)

In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import time
import hashlib
from concurrent.futures import ThreadPoolExecutor
import os

# Option 1: Fast hash-based comparison (fastest)
def compare_images_hash(path1, path2):
    """Fast comparison using file hash"""
    try:
        with open(path1, 'rb') as f1, open(path2, 'rb') as f2:
            hash1 = hashlib.md5(f1.read()).hexdigest()
            hash2 = hashlib.md5(f2.read()).hexdigest()
            return hash1 == hash2
    except:
        return False

# Option 2: Fast perceptual hash comparison
def compare_images_phash(path1, path2, threshold=5):
    """Fast perceptual hash comparison"""
    try:
        import imagehash
        from PIL import Image
        
        img1 = Image.open(path1)
        img2 = Image.open(path2)
        
        hash1 = imagehash.phash(img1)
        hash2 = imagehash.phash(img2)
        
        difference = hash1 - hash2
        return difference <= threshold
    except:
        return False

# Option 3: GPU-accelerated comparison using OpenCV with Metal Performance Shaders (Mac)
def compare_images_fast(path1, path2, threshold=0.85):
    """Faster comparison with reduced image size and GPU acceleration where possible"""
    img1 = cv2.imread(path1)
    img2 = cv2.imread(path2)

    if img1 is None or img2 is None:
        return False
    
    # Resize images to smaller size for faster processing
    target_size = (256, 256)  # Much smaller for speed
    img1_small = cv2.resize(img1, target_size)
    img2_small = cv2.resize(img2, target_size)
    
    # Convert to grayscale
    gray1 = cv2.cvtColor(img1_small, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(img2_small, cv2.COLOR_BGR2GRAY)
    
    # Use template matching (faster than SSIM)
    result = cv2.matchTemplate(gray1, gray2, cv2.TM_CCOEFF_NORMED)
    similarity_score = np.max(result)
    
    if similarity_score >= threshold:
        plt.figure(figsize=(15, 5))
        plt.subplot(1, 3, 1)
        plt.imshow(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB))
        plt.title('Image Original to compare')
        plt.subplot(1, 3, 2)
        plt.imshow(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB))
        plt.title('Image in Human Annotate')
        plt.subplot(1, 3, 3)
        plt.text(0.5, 0.5, f'Similarity Score: {similarity_score:.4f}', 
                 horizontalalignment='center', verticalalignment='center', 
                 transform=plt.gca().transAxes, fontsize=14)
        plt.axis('off')
        plt.show()
        time.sleep(0.2)
        plt.close()
        return True
    
    return False

# Option 4: Multi-threaded batch comparison
def compare_images_batch(original_paths, human_paths, comparison_func=compare_images_fast):
    """Process multiple images in parallel"""
    matches = []
    
    def compare_single(args):
        orig_path, human_path = args
        return comparison_func(orig_path, human_path), orig_path, human_path
    
    # Create all combinations to compare
    comparisons = [(orig, human) for orig in original_paths for human in human_paths]
    
    # Use ThreadPoolExecutor for parallel processing
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        results = list(executor.map(compare_single, comparisons))
    
    # Filter matches
    for is_match, orig_path, human_path in results:
        if is_match:
            matches.append((orig_path, human_path))
    
    return matches

# Choose your comparison function:
# For exact matches: compare_images = compare_images_hash
# For similar images: compare_images = compare_images_phash  
# For flexible similarity: compare_images = compare_images_fast

compare_images = compare_images_fast  # Default choice

In [None]:
import shutil
import os
from tqdm import tqdm

count_found = 0
original_image_path_len = len(original_image_path)
print(f"Total original images to find: {original_image_path_len}")
all_img_paths_len = len(all_img_paths)
print(f"Total images in human annotate directory: {all_img_paths_len}")

for img_path in tqdm(original_image_path, desc="Processing original images"):
    copy_img_path = os.path.join(human_annotate_dir, Path(img_path).parent.name)
    copy_img_name = Path(img_path).name
    for img_human in all_img_paths:
        if compare_images(img_path, img_human):
            found = True
            # Copy and rename the image
            shutil.copy(img_human, os.path.join(copy_img_path, copy_img_name))
            # Copy and rename the XML if it exists
            xml_human = os.path.splitext(img_human)[0] + ".xml"
            if os.path.exists(xml_human):
                shutil.copy(xml_human, os.path.join(copy_img_path, os.path.splitext(copy_img_name)[0] + ".xml"))
                os.remove(xml_human)  # Remove the xml file after copying
            # Remove img_human from all_img_paths and delete the file
            all_img_paths.remove(img_human)
            os.remove(img_human)  # Remove the image file after copying
            count_found += 1
            break
        else:
            continue

In [None]:
print(count_found)

In [None]:
# Delete the remaining files in train directory which are not matched
print(f"Remaining unmatched images to delete: {len(all_img_paths)}")

for file_path in all_img_paths:
    try:
        # Delete the image file itself
        if os.path.exists(file_path):
            os.remove(file_path)

        # Delete the corresponding XML file
        xml_path = os.path.splitext(file_path)[0] + ".xml"
        if os.path.exists(xml_path):
            os.remove(xml_path)
            
    except OSError as e:
        print(f"Error deleting file {file_path}: {e}")

print("Cleanup complete. All unmatched files have been deleted from the root of the train directory.")

In [None]:
# Transform to XML format

import os
import xml.etree.ElementTree as ET
from pathlib import Path
import uuid
from xml.dom import minidom
from tqdm import tqdm

def generate_unique_id():
    """Generate a unique 8-character hex ID."""
    return uuid.uuid4().hex[:8]

def parse_roboflow_xml(xml_file_path):
    """Parse a Roboflow XML file and extract bounding box information."""
    tree = ET.parse(xml_file_path)
    root = tree.getroot()
    
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    # Collect all relevant speech balloons/bubbles (Speech-Balloons and classes containing "no tail")
    speech_balloons = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        # Check if the class is "Speech-Balloons" or contains "no tail" (case-insensitive)
        if name == 'Speech-Balloons' or 'No tail' in name.lower():
            bndbox = obj.find('bndbox')
            balloon = {
                'xmin': int(bndbox.find('xmin').text),
                'ymin': int(bndbox.find('ymin').text),
                'xmax': int(bndbox.find('xmax').text),
                'ymax': int(bndbox.find('ymax').text)
            }
            speech_balloons.append(balloon)
    
    return width, height, speech_balloons

def create_manga109_xml(book_title, pages_data, output_path):
    """Create the consolidated XML file in the target Manga109-like format."""
    book = ET.Element('book', title=book_title)
    characters = ET.SubElement(book, 'characters')
    pages = ET.SubElement(book, 'pages')
    
    for page_index, page_data in sorted(pages_data.items()):
        if page_data is None:
            # Create an empty page entry if no XML was found
            page_elem = ET.SubElement(pages, 'page', index=str(page_index), width="0", height="0")
        else:
            width, height, speech_balloons = page_data
            page_elem = ET.SubElement(pages, 'page', index=str(page_index), width=str(width), height=str(height))
            
            # Rename all collected objects as "bubble"
            for balloon in speech_balloons:
                bubble_id = generate_unique_id()
                ET.SubElement(page_elem, 'bubble', id=bubble_id,
                              xmin=str(balloon['xmin']),
                              ymin=str(balloon['ymin']),
                              xmax=str(balloon['xmax']),
                              ymax=str(balloon['ymax']))
    
    xml_string = ET.tostring(book, encoding='unicode')
    dom = minidom.parseString(xml_string)
    pretty_xml = dom.toprettyxml(indent="  ")
    
    # Write the pretty-printed XML to the output file
    with open(output_path, 'w', encoding='utf-8') as f:
        # Write without the default XML declaration line
        f.write('\n'.join(pretty_xml.split('\n')[1:]))

def process_book_folders(input_root, output_root):
    """Iterate through book folders, parse their XMLs, and create consolidated files."""
    input_path = Path(input_root)
    output_path = Path(output_root)
    output_path.mkdir(exist_ok=True)
    
    book_folders = [f for f in input_path.iterdir() if f.is_dir()]
    
    for book_folder in tqdm(book_folders, desc="Processing books"):
        book_title = book_folder.name
        xml_files = {}
        max_page = -1
        
        for xml_file in book_folder.glob('*.xml'):
            try:
                page_num = int(xml_file.stem)
                xml_files[page_num] = xml_file
                max_page = max(max_page, page_num)
            except ValueError:
                continue
        
        pages_data = {}
        for page_index in range(max_page + 1):
            if page_index in xml_files:
                try:
                    width, height, speech_balloons = parse_roboflow_xml(xml_files[page_index])
                    pages_data[page_index] = (width, height, speech_balloons)
                except Exception:
                    pages_data[page_index] = None # Mark as empty on error
            else:
                pages_data[page_index] = None # Mark as empty if no XML exists
        
        output_xml_path = output_path / f"{book_title}.xml"
        create_manga109_xml(book_title, pages_data, output_xml_path)

# --- Main Execution ---
input_directory = human_annotate_dir
output_directory = os.path.join(os.path.dirname(human_annotate_dir), 'annotations_xml')

print(f"Starting XML transformation...")
print(f"Input annotations folder: {input_directory}")
print(f"Output XML folder: {output_directory}")
print("-" * 60)

process_book_folders(input_directory, output_directory)

print("-" * 60)
print("Transformation complete.")
print(f"Consolidated XML files have been saved to: {output_directory}")