In [None]:
!ls
%cd ../..
!ls

In [None]:
import os

END_WITH_LOCAL = 'put-your-folder-ending-here'

os.environ['PATH'] = f"/root/.cargo/bin:{os.environ['PATH']}"

BASE_DIR = os.getcwd()
print(f"BASE_DIR: {BASE_DIR}")

# Simple validation
if not (BASE_DIR.endswith('/content') or BASE_DIR.endswith(END_WITH_LOCAL)):
    raise ValueError(f"Expected to be in .../{END_WITH_LOCAL} or .../content directory, but got: {BASE_DIR}")


In [None]:
# --- Import Required Libraries ---

# File and data handling
import os
import json
import random
import shutil
from collections import defaultdict
from pathlib import Path

# Data processing and visualization
from sklearn.model_selection import train_test_split
import yaml

# Deep learning
import torch
from ultralytics import YOLO
from tqdm.notebook import tqdm  

In [None]:
# --- 1. Configuration Section ---

# Paths to your data and model files
JSON_DIR = os.path.join(BASE_DIR,'data','MangaSegmentation/jsons_processed')
IMAGE_ROOT_DIR = os.path.join(BASE_DIR,'data','Manga109_released_2023_12_07/images')
DATASET_DIR = os.path.join(BASE_DIR,'data','YOLOv8_data')

# Validate paths
print("\nValidating directories...")
for path in [JSON_DIR, IMAGE_ROOT_DIR]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Directory not found: {path}")
    else:
        print(f"Found directory: {path}")
        # List some contents
        contents = os.listdir(path)[:5]
        print(f"Sample contents: {contents}")

# Create dataset directories
print("\nCreating dataset directories...")
for split in ['train', 'val']:
    for subdir in ['images', 'labels']:  
        dir_path = os.path.join(DATASET_DIR, f'{subdir}/{split}')
        os.makedirs(dir_path, exist_ok=True)
        print(f"Created: {dir_path}")

# Set category information directly
TARGET_CATEGORY_ID = 5  # Fixed category ID for balloon
TARGET_CATEGORY_NAME = "balloon"  # Fixed category name

print(f"\nTarget Category Configuration:")
print(f"Category ID: {TARGET_CATEGORY_ID}")
print(f"Category Name: {TARGET_CATEGORY_NAME}")

In [None]:
# --- 2. Data Preparation ---

print("\n--- 2. Preparing Data from Processed JSONs ---")

def prepare_manga_balloon_data(json_dir, image_root):
    """
    Loads pre-processed JSON files (with polygons), filters for the target 
    category, and returns a list of image records. This logic is adapted 
    from your working 'train_v3 copy.ipynb'.
    """
    all_images = {}
    all_annotations = defaultdict(list)

    print("Loading and parsing PRE-PROCESSED JSON files...")
    json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
    
    for json_file in tqdm(json_files, desc="Processing JSONs"):
        with open(os.path.join(json_dir, json_file), 'r') as f:
            data = json.load(f)
            for img_info in data.get('images', []):
                all_images[img_info['id']] = img_info
            for ann_info in data.get('annotations', []):
                all_annotations[ann_info['image_id']].append(ann_info)

    print(f"Loaded data for {len(all_images)} total images.")

    dataset_records = []
    for img_id, img_info in all_images.items():
        # Create a base record for the image
        record = {
            "file_name": os.path.join(image_root, img_info['file_name']),
            "image_id": img_id,
            "height": img_info['height'],
            "width": img_info['width'],
        }
        
        # Filter for balloon annotations
        balloon_annotations = []
        for ann in all_annotations.get(img_id, []):
            if ann.get('category_id') == TARGET_CATEGORY_ID:
                # Ensure segmentation data is present and not empty
                if ann.get('segmentation'):
                    balloon_annotations.append({
                        "segmentation": ann['segmentation'],
                        "category_id": 0,  # All balloons will be class 0
                    })
        
        # Only add images that contain at least one balloon
        if balloon_annotations:
            record["annotations"] = balloon_annotations
            dataset_records.append(record)
            
    print(f"Data preparation complete. Found {len(dataset_records)} images containing '{TARGET_CATEGORY_NAME}'.")
    return dataset_records

# Run the data preparation
all_data = prepare_manga_balloon_data(JSON_DIR, IMAGE_ROOT_DIR)

In [None]:
# --- 3. Split and Prepare YOLO Dataset ---

# Prepare the data
all_data = prepare_manga_balloon_data(JSON_DIR, IMAGE_ROOT_DIR)

# --- Group data by manga title ---
print("\nGrouping data by manga series for a robust train/val split...")
grouped_data = defaultdict(list)
for record in all_data:
    manga_name = Path(record['file_name']).parts[-2]
    grouped_data[manga_name].append(record)
print(f"Found {len(grouped_data)} unique manga series.")

# Split manga titles to prevent data leakage
manga_titles = list(grouped_data.keys())
train_titles, val_titles = train_test_split(manga_titles, test_size=0.2, random_state=42)
print(f"Splitting into {len(train_titles)} training series and {len(val_titles)} validation series.")

# Reconstruct train/val lists based on the title split
train_data = [record for title in train_titles for record in grouped_data[title]]
val_data = [record for title in val_titles for record in grouped_data[title]]
random.Random(42).shuffle(train_data)
random.Random(42).shuffle(val_data)
print(f"Final training set size: {len(train_data)} images")
print(f"Final validation set size: {len(val_data)} images")

In [None]:
def process_dataset_split_segmentation(data_split, split_type):
    """
    Xử lý một tập dữ liệu để tạo bộ dữ liệu định dạng YOLOv8 INSTANCE SEGMENTATION.
    Hàm này sẽ ghi tọa độ đa giác (polygons) đã được chuẩn hóa vào file .txt.
    """
    total_annotations = 0
    
    for record in tqdm(data_split, desc=f"Processing {split_type} split"):
        original_img_path = record['file_name']
        img_height = record['height']
        img_width = record['width']
        
        # Kiểm tra xem ảnh có tồn tại không
        if not os.path.exists(original_img_path):
            print(f"Warning: Image not found at {original_img_path}. Skipping.")
            continue
            
        # Tạo định danh duy nhất cho ảnh để tránh trùng lặp tên
        manga_title = Path(original_img_path).parts[-2]
        img_stem = Path(original_img_path).stem
        img_identifier = f"{manga_title}_{img_stem}"
        
        # 1. Sao chép ảnh vào đúng thư mục train/val
        dest_img_path = os.path.join(DATASET_DIR, f'images/{split_type}', f"{img_identifier}.jpg")
        shutil.copy2(original_img_path, dest_img_path)
        
        # 2. Tạo file label .txt tương ứng
        label_path = os.path.join(DATASET_DIR, f'labels/{split_type}', f"{img_identifier}.txt")
        
        # 3. Ghi tọa độ polygon đã được chuẩn hóa vào file label
        with open(label_path, 'w') as f:
            for ann in record.get('annotations', []):
                # Mỗi 'ann' tương ứng với một balloon (một object)
                segmentation = ann.get('segmentation')
                if not segmentation:
                    continue
                
                # Mỗi object có thể có nhiều đa giác (trường hợp phức tạp)
                for poly in segmentation:
                    # Chuẩn hóa tọa độ của đa giác
                    # poly là một list các số [x1, y1, x2, y2, ...]
                    normalized_poly = []
                    for i in range(0, len(poly), 2):
                        x = poly[i] / img_width
                        y = poly[i+1] / img_height
                        normalized_poly.extend([x, y])
                    
                    # Ghi vào file theo định dạng: class_id x1 y1 x2 y2 ...
                    # Class ID luôn là 0 vì chúng ta chỉ có 1 lớp là "balloon"
                    if normalized_poly:
                        f.write(f"0 {' '.join(map(str, normalized_poly))}\n")
                        total_annotations += 1
    
    return len(data_split), total_annotations

In [None]:
# --- Thực thi việc chuyển đổi dữ liệu với hàm đã sửa lỗi ---
print("\nProcessing training split...")
train_images_count, train_annotations_count = process_dataset_split_segmentation(train_data, 'train')
print("Processing validation split...")
val_images_count, val_annotations_count = process_dataset_split_segmentation(val_data, 'val')

# --- Kiểm tra và xác minh ---
print(f"\nDataset created successfully:")
print(f"Training images: {train_images_count}")
print(f"Training annotations (polygons): {train_annotations_count}")
print(f"Validation images: {val_images_count}")
print(f"Validation annotations (polygons): {val_annotations_count}")

final_train_images = len(os.listdir(os.path.join(DATASET_DIR, 'images/train')))
final_val_images = len(os.listdir(os.path.join(DATASET_DIR, 'images/val')))

print(f"\nFinal verification from disk:")
print(f"Total training images in folder: {final_train_images}")
print(f"Total validation images in folder: {final_val_images}")

if final_train_images == len(train_data) and final_val_images == len(val_data):
    print("\nVerification successful: All images were copied correctly.")
else:
    print("\nVerification WARNING: Mismatch in image counts.")

In [None]:
# Create YAML Configuration
print("\n--- 4. Creating dataset.yaml Configuration File ---")

dataset_config = {
    'path': os.path.abspath(DATASET_DIR),
    'train': 'images/train',
    'val': 'images/val',
    'names': {
        0: TARGET_CATEGORY_NAME
    }
}

yaml_path = Path(DATASET_DIR) / 'dataset.yaml'
with open(yaml_path, 'w') as f:
    yaml.dump(dataset_config, f, default_flow_style=False, sort_keys=False)

print(f"YAML configuration saved to: {yaml_path}")
print("\nYAML Content:")
print(yaml.dump(dataset_config))

In [None]:
# ===================================================================
#  Model Training 
# ===================================================================
print("\n--- 5. Initializing and Training YOLOv8 Model ---")

# Check for GPU and clear cache for a clean start
if torch.cuda.is_available():
    device_name = torch.cuda.get_device_name(0)
    print(f"GPU is available. Using: {device_name}")
    torch.cuda.empty_cache()
else:
    print("WARNING: No GPU found. Training will be very slow on a CPU.")

# 1. Load a pretrained YOLOv8 segmentation model
model = YOLO('yolov8s-seg.pt')

# 2. Train the model with essential parameters
print("\nStarting model training...")
results = model.train(
    data=str(yaml_path),
    epochs=5,
    imgsz=1280,
    batch=4,
    project='YOLOv8_Training_Results',
    name='balloon_segmentation_run1',
    exist_ok=True,
)

print("\n--- Training Finished ---")
print(f"All results, logs, and plots have been saved to: {model.trainer.save_dir}")
print(f"The best performing model is saved as: {model.trainer.best}")

In [None]:
# ===================================================================
# Automatic and Comprehensive Evaluation
# ===================================================================
print("\n--- 6. Evaluating Final Model Performance ---")

# 1. Load the best model that was saved during training
path_to_best_model = model.trainer.best
if not os.path.exists(path_to_best_model):
    raise FileNotFoundError(f"Could not find the best model at: {path_to_best_model}")

print(f"Loading best model from: {path_to_best_model}")
best_model = YOLO(path_to_best_model)

# 2. Run validation on the 'val' split to get the metrics object
print("\nRunning final validation on the test set...")
metrics = best_model.val(
    split='val',
    project='YOLOv8_Training_Results',
    name='balloon_segmentation_run1',
    exist_ok=True,
)

# 3. Automatically discover, group, and print all available metrics
print("\n" + "#"*60)
print("--- Final Comprehensive Evaluation Report (All Metrics) ---")
print("#"*60)
print(f"\nValidation results saved to: {metrics.save_dir}\n")

# Dictionaries to hold the grouped metrics
box_metrics = {}
mask_metrics = {}
other_metrics = {}

# Iterate through all key-value pairs in the results dictionary
for key, value in metrics.results_dict.items():
    # Clean the key by removing the 'metrics/' prefix
    clean_key = key.replace('metrics/', '').strip()
    
    # Sort keys into their respective groups
    if '(B)' in clean_key:
        final_key = clean_key.replace('(B)', '').strip()
        box_metrics[final_key] = value
    elif '(M)' in clean_key:
        final_key = clean_key.replace('(M)', '').strip()
        mask_metrics[final_key] = value
    else:
        other_metrics[clean_key] = value

# --- Function to print a dictionary of metrics neatly ---
def print_metric_group(title, metric_dict):
    print(f"\n--- {title} ---")
    if not metric_dict:
        print("     (No metrics found for this group)")
        return
    # Sort keys for consistent ordering
    for key in sorted(metric_dict.keys()):
        value = metric_dict[key]
        # Use a fixed width for the key for nice alignment
        print(f"     - {key:<15}: {value:.4f}")

# --- Print each group of metrics ---
print_metric_group("Bounding Box Detection Performance", box_metrics)
print_metric_group("Instance Segmentation Performance", mask_metrics)
print_metric_group("Other Metrics (e.g., Losses)", other_metrics)

print("\n" + "#"*60)