# üñåÔ∏è Advanced Smart Manhwa Cleaner

Welcome! This notebook allows you to automatically clean text from comic pages (manhwa/manga) and enhance their quality. 

**Instructions:**
1. Run the first cell (Setup) to install all the necessary libraries.
2. Run the second cell (Main Application Code) to define all the functions.
3. Run the third cell (Launch the App!) to start the web interface. A public URL will be generated ‚Äì click on it to open the application in a new tab.

In [None]:
# =========================
# 1. Setup
# =========================
print('‚è≥ Installing dependencies... This may take a few minutes.')
!pip install torch torchvision numpy opencv-python scikit-image Pillow huggingface-hub loguru realesrgan lama-cleaner paddlepaddle-gpu paddleocr gradio
print('‚úÖ Dependencies installed!')

In [None]:
# =========================
# 2. Main Application Code
# =========================
import os
import cv2
import numpy as np
from PIL import Image
import torch
import zipfile
import tempfile
from lama_cleaner.model import LaMaInpainting
from realesrgan import RealESRGAN
from paddleocr import PaddleOCR
import gradio as gr

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'üñ•Ô∏è Using device: {device}')

lama_model = LaMaInpainting(model_type='lama', device=device)
print('‚úÖ Text removal model loaded')

model_cache = {}

def get_ocr_model(lang):
    if lang not in model_cache:
        print(f'üîÑ Loading OCR model for language: {lang}')
        model_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang)
    return model_cache[lang]

def get_realesrgan_model(scale):
    key = f'realesrgan_x{scale}'
    if key not in model_cache:
        print(f'üîÑ Loading super-resolution model with scale: {scale}x')
        model = RealESRGAN(device, scale=scale)
        model.load_weights(f'RealESRGAN_x{scale}plus.pth', download=True)
        model_cache[key] = model
    return model_cache[key]

def clean_page(image, ocr_model, lama_model, realesrgan_model):
    image_np = np.array(image.convert('RGB'))
    h, w = image_np.shape[:2]
    result = ocr_model.ocr(image_np)
    mask = np.zeros((h, w), dtype=np.uint8)
    if result and result[0]:
        for line in result[0]:
            pts = np.array(line[0], dtype=np.int32)
            cv2.fillPoly(mask, [pts], 255)
    cleaned_image = lama_model(image_np, mask)
    upscaled_image = realesrgan_model.predict(cleaned_image)
    return Image.fromarray(upscaled_image)

def clean_chapter_folder(chapter_folder, ocr_model, lama_model, realesrgan_model, output_format):
    output_folder = os.path.join(chapter_folder, 'chapter_cleaned')
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    supported_formats = ('.png', '.jpg', '.jpeg', '.webp', '.tiff', '.bmp')
    files = sorted([f for f in os.listdir(chapter_folder) if f.lower().endswith(supported_formats)])
    if not files:
        print(f'‚ö†Ô∏è No images found in folder: {chapter_folder}')
        return None
    for idx, file in enumerate(files, 1):
        input_path = os.path.join(chapter_folder, file)
        print(f'üìÑ Processing page {idx}/{len(files)}: {file}')
        try:
            image = Image.open(input_path)
            result_image = clean_page(image, ocr_model, lama_model, realesrgan_model)
            file_name, _ = os.path.splitext(file)
            output_filename = f'{file_name}.{output_format.lower()}'
            output_path = os.path.join(output_folder, output_filename)
            if output_format == 'JPG':
                result_image.convert('RGB').save(output_path, 'jpeg', quality=95)
            else:
                result_image.save(output_path, output_format.upper())
        except Exception as e:
            print(f'‚ùå Error processing file {file}: {e}')
    return output_folder

def process_all_chapters(uploaded_files, lang, scale, output_format, progress=gr.Progress(track_tqdm=True)):
    if not uploaded_files:
        return None, 'Please select image files first.'
    progress(0, desc='Loading models...')
    ocr_model = get_ocr_model(lang)
    realesrgan_model = get_realesrgan_model(scale)
    all_paths = [f.name for f in uploaded_files]
    root_path = os.path.commonpath(all_paths) if len(all_paths) > 1 else os.path.dirname(all_paths[0])
    potential_chapters = [d for d in os.listdir(root_path) if os.path.isdir(os.path.join(root_path, d))]
    chapters_to_process = []
    if potential_chapters:
        chapters_to_process = [os.path.join(root_path, d) for d in potential_chapters]
        print(f'üìÇ Found {len(chapters_to_process)} chapters in: {root_path}')
    else:
        chapters_to_process = [root_path]
        print(f'üìÇ No chapters found, processing current folder: {root_path}')
    if not chapters_to_process:
        return None, 'No images found for processing in the selected folder.'
    processed_folders = []
    for chapter_path in progress.tqdm(chapters_to_process, desc='Processing chapters'):
        output_folder = clean_chapter_folder(chapter_path, ocr_model, lama_model, realesrgan_model, output_format)
        if output_folder:
            processed_folders.append(output_folder)
    if not processed_folders:
        return None, 'No images were processed.'
    zip_path = os.path.join(tempfile.gettempdir(), 'cleaned_chapters.zip')
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for folder in processed_folders:
            for root, _, files in os.walk(folder):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, os.path.dirname(folder))
                    zipf.write(file_path, arcname=arcname)
    return zip_path, f'‚úÖ Processing complete. Your download is ready.'

interface = gr.Interface(
    fn=process_all_chapters,
    inputs=[
        gr.Files(label='Select chapter images or drag the entire folder'),
        gr.Dropdown(['en', 'ar', 'ja', 'ko', 'ch_sim', 'fr', 'de'], label='Text language in images', value='en', info='Choose the language for accurate removal'),
        gr.Dropdown([2, 4], label='Super-resolution scale', value=2, info='2x is faster, 4x gives higher quality'),
        gr.Radio(['PNG', 'JPG'], label='Save cleaned images format', value='PNG')
    ],
    outputs=[
        gr.File(label='Download cleaned chapters (.zip)'),
        gr.Textbox(label='Status', lines=5)
    ],
    title='üñåÔ∏è Advanced Smart Manhwa Cleaner',
    description='Upload the images you want to clean. You can upload a single chapter or drag a folder containing multiple chapter folders.',
    allow_flagging='never',
    theme=gr.themes.Soft()
)

In [None]:
# =========================
# 3. Launch the App!
# =========================
interface.launch(share=True, debug=True)