In [91]:
!kill -9 $(nvidia-smi | grep python | awk '{print $5}') || true


kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


In [92]:
!nvidia-smi

Mon Oct  6 05:37:08 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   66C    P0             29W /   70W |   15093MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [93]:
!pip install --quiet ultralytics opencv-python matplotlib torch torchvision tqdm ninja

In [94]:
# --- Import Required Libraries ---

# File and data handling
import os
import json
import random
import shutil
from collections import defaultdict
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from functools import partial

# Data processing and visualization
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import yaml

# Deep learning
import torch
import torch.multiprocessing as mp
from ultralytics import YOLO
from tqdm.notebook import tqdm  # For progress bars

# Fix multiprocessing issues
mp.set_start_method('spawn', force=True)  # Set start method to spawn
torch.multiprocessing.set_sharing_strategy('file_system')  # Use file_system sharing strategy

# Set environment variable for memory allocation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256"

# Print system info
print("=== System Information ===")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    # Force using only the first GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # Use both GPUs
    # Verify GPU setup
    n_gpus = torch.cuda.device_count()
    print(f"\nNumber of GPUs available: {n_gpus}")
    for i in range(n_gpus):
        props = torch.cuda.get_device_properties(i)
        print(f"\nGPU {i}: {props.name}")
        print(f"- Memory: {props.total_memory / 1024**3:.1f} GB")
        print(f"- CUDA Capability: {props.major}.{props.minor}")
    
    # Set memory allocation strategy
    torch.cuda.empty_cache()
    for i in range(n_gpus):
        torch.cuda.set_per_process_memory_fraction(0.7, i)  # Use 70% of available memory per GPU
else:
    print("WARNING: No GPU found. Training on CPU will be very slow!")

# Get CPU cores for parallel processing
NUM_CORES = min(os.cpu_count(), 4)  # Limit number of CPU cores
print(f"\nCPU cores to be used: {NUM_CORES}")

# Configure PyTorch for memory efficiency
torch.backends.cudnn.benchmark = True  # Enable cuDNN auto-tuner
torch.backends.cuda.matmul.allow_tf32 = True  # Allow TF32 on matmul
torch.backends.cudnn.allow_tf32 = True  # Allow TF32 on cudnn

# Worker initialization function for DataLoader
def worker_init_fn(worker_id):
    """Initialize worker process properly"""
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

=== System Information ===
PyTorch version: 2.6.0+cu124
CUDA available: True

Using single GPU: Tesla T4 (14.7 GB)
CUDA version: 12.4

CPU cores available: 4


In [95]:
# --- 1. Configuration Section ---

# Paths to your data and model files
JSON_DIR = '/kaggle/input/manga109-jsons/jsons'  # Thư mục chứa các file JSON
IMAGE_ROOT_DIR = '/kaggle/input/manga109-images/images'  # Thư mục gốc chứa ảnh
DATASET_DIR = 'balloon_dataset'  # Directory to store YOLO format dataset
PRE_TRAINED_MODEL_DIR = 'pre-trained_model'

# Validate paths
print("\nValidating directories...")
for path in [JSON_DIR, IMAGE_ROOT_DIR]:
    if not os.path.exists(path):
        raise FileNotFoundError(f"Directory not found: {path}")
    else:
        print(f"Found directory: {path}")
        # List some contents
        contents = os.listdir(path)[:5]
        print(f"Sample contents: {contents}")

# Create dataset directories
print("\nCreating dataset directories...")
for split in ['train', 'val']:
    for subdir in ['images', 'labels', 'masks']:  # Added masks directory
        dir_path = os.path.join(DATASET_DIR, f'{subdir}/{split}')
        os.makedirs(dir_path, exist_ok=True)
        print(f"Created: {dir_path}")

# Set category information directly
TARGET_CATEGORY_ID = 5  # Fixed category ID for balloon
TARGET_CATEGORY_NAME = "balloon"  # Fixed category name

print(f"\nTarget Category Configuration:")
print(f"Category ID: {TARGET_CATEGORY_ID}")
print(f"Category Name: {TARGET_CATEGORY_NAME}")

# Optional: Verify category in a sample file
print("\nChecking JSON structure and categories...")
sample_files = [f for f in os.listdir(JSON_DIR) if f.endswith('.json')][:1]
if sample_files:
    sample_path = os.path.join(JSON_DIR, sample_files[0])
    with open(sample_path, 'r') as f:
        sample_data = json.load(f)
        print("\nJSON structure:")
        print("Keys:", list(sample_data.keys()))
        if 'images' in sample_data:
            print("Sample image entry:", sample_data['images'][0])
        if 'annotations' in sample_data:
            print("Sample annotation entry:", sample_data['annotations'][0])
        
        # Check if our target category is present in annotations
        if 'annotations' in sample_data:
            balloon_annotations = [ann for ann in sample_data['annotations'] 
                                if ann['category_id'] == TARGET_CATEGORY_ID]
            if balloon_annotations:
                print(f"\nFound {len(balloon_annotations)} balloon annotations in sample file")
                print("Sample balloon annotation:", balloon_annotations[0])
            else:
                print("\nWarning: No balloon annotations found in sample file")

Creating dataset directories...
Created: balloon_dataset/images/train
Created: balloon_dataset/labels/train
Created: balloon_dataset/images/val
Created: balloon_dataset/labels/val


In [96]:
# --- 2. Data Preparation ---

def decode_rle(rle):
    """Decode RLE format to binary mask"""
    if isinstance(rle, dict):
        # COCO RLE format
        h, w = rle['size']
        if 'counts' in rle:
            counts = rle['counts']
            if isinstance(counts, str):
                # If counts is a string, it's compressed RLE
                from pycocotools import mask as mask_utils
                import numpy as np
                try:
                    # Convert RLE to binary mask
                    mask = mask_utils.decode(rle)
                    return mask.astype(np.float32)
                except Exception as e:
                    print(f"Error decoding RLE: {str(e)}")
                    return None
    return None

def polygon_to_mask(segmentation, img_height, img_width):
    """Convert segmentation to binary mask, handling both polygon and RLE formats"""
    import numpy as np
    from PIL import Image, ImageDraw
    
    # Check if segmentation is RLE format
    if isinstance(segmentation, dict) and 'counts' in segmentation and 'size' in segmentation:
        return decode_rle(segmentation)
    
    # If not RLE, treat as polygon
    mask = Image.new('L', (img_width, img_height), 0)
    
    # Handle case where segmentation is a list of polygons
    if isinstance(segmentation, list):
        # If first element is also a list, we have multiple polygons
        if segmentation and isinstance(segmentation[0], list):
            polygons = segmentation
        else:
            # Single polygon as flat list of coordinates
            polygons = [segmentation]
    else:
        return None
    
    try:
        for polygon in polygons:
            if len(polygon) >= 6:  # At least 3 points (6 coordinates)
                # Convert flat list to list of points
                points = []
                for i in range(0, len(polygon), 2):
                    x, y = polygon[i], polygon[i + 1]
                    if not (isinstance(x, (int, float)) and isinstance(y, (int, float))):
                        continue
                    if x < 0 or y < 0 or x > img_width or y > img_height:
                        continue
                    points.append((x, y))
                
                if len(points) >= 3:  # Need at least 3 points for a polygon
                    # Draw the polygon
                    ImageDraw.Draw(mask).polygon(points, outline=1, fill=1)
    except Exception as e:
        print(f"Error creating mask: {str(e)}")
        return None
    
    # Convert to numpy array
    mask = np.array(mask, dtype=np.float32)
    return mask

def xywh_to_yolo(bbox, img_width, img_height):
    """Convert XYWH bbox to YOLO format (x_center, y_center, width, height) normalized"""
    try:
        # Handle different bbox formats
        if isinstance(bbox, (list, tuple)):
            if len(bbox) == 4:
                x, y, w, h = bbox
            else:
                raise ValueError("Bbox must have 4 coordinates")
        elif isinstance(bbox, dict):
            x = bbox.get('x')
            y = bbox.get('y')
            w = bbox.get('width')
            h = bbox.get('height')
        else:
            raise ValueError("Unsupported bbox format")
        
        # Validate values
        if any(not isinstance(v, (int, float)) for v in [x, y, w, h]):
            raise ValueError("Bbox coordinates must be numeric")
        if img_width <= 0 or img_height <= 0:
            raise ValueError("Image dimensions must be positive")
        if w <= 0 or h <= 0:
            raise ValueError("Bbox width and height must be positive")
        if x < 0 or y < 0 or x + w > img_width or y + h > img_height:
            raise ValueError("Bbox coordinates out of image bounds")
        
        return [
            (x + w/2) / img_width,  # x_center
            (y + h/2) / img_height, # y_center
            w / img_width,          # width
            h / img_height          # height
        ]
    except Exception as e:
        print(f"Error converting bbox: {str(e)}")
        return None

def find_image_file(image_root, file_name):
    """Find image file with case-insensitive matching"""
    try:
        # Get manga directory and image name
        parts = file_name.split('/')
        if len(parts) == 2:
            manga_dir, image_name = parts
        else:
            manga_dir = os.path.dirname(file_name)
            image_name = os.path.basename(file_name)
        
        # First try exact path
        direct_path = os.path.join(image_root, manga_dir, image_name)
        if os.path.exists(direct_path):
            return os.path.join(manga_dir, image_name)
        
        # Try case-insensitive search
        manga_path = None
        for root_dir in os.listdir(image_root):
            if root_dir.lower() == manga_dir.lower():
                manga_path = os.path.join(image_root, root_dir)
                break
        
        if manga_path and os.path.exists(manga_path):
            for img_file in os.listdir(manga_path):
                if img_file.lower() == image_name.lower():
                    return os.path.join(os.path.basename(manga_path), img_file)
    except Exception as e:
        print(f"Error finding image file: {str(e)}")
    return None

def prepare_manga_balloon_data(json_dir, image_root):
    """
    Loads and processes JSON files in COCO format, filtering for balloon annotations.
    Uses sequential processing to avoid multiprocessing issues.
    """
    print("\n=== Starting Data Preparation ===")
    print(f"JSON directory: {json_dir}")
    print(f"Image root directory: {image_root}")
    print(f"Target category ID: {TARGET_CATEGORY_ID}")
    print(f"Target category name: {TARGET_CATEGORY_NAME}")
    
    # First, install pycocotools if not already installed
    try:
        import pycocotools
    except ImportError:
        print("Installing pycocotools...")
        os.system('pip install pycocotools')
        import pycocotools
    
    dataset_dicts = {}
    json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
    print(f"\nFound {len(json_files)} JSON files")
    
    total_images = 0
    total_annotations = 0
    skipped_annotations = 0
    
    for json_file in tqdm(json_files, desc="Processing JSON files"):
        try:
            with open(os.path.join(json_dir, json_file), 'r', encoding='utf-8') as f:
                manga_data = json.load(f)
                
                # Validate JSON structure
                if not isinstance(manga_data, dict):
                    print(f"Warning: {json_file} has invalid format")
                    continue
                
                if not all(k in manga_data for k in ['images', 'annotations']):
                    print(f"Warning: {json_file} missing required keys")
                    continue
                
                # Create lookup for images
                image_lookup = {}
                for img in manga_data['images']:
                    if 'id' not in img or 'file_name' not in img:
                        continue
                    image_lookup[img['id']] = img
                
                # Process annotations
                file_annotations = 0
                for ann in manga_data['annotations']:
                    try:
                        # Check if this is a balloon annotation
                        if ann.get('category_id') != TARGET_CATEGORY_ID:
                            continue
                        
                        # Get image info
                        img_id = ann.get('image_id')
                        if img_id not in image_lookup:
                            continue
                        
                        img_info = image_lookup[img_id]
                        relative_path = find_image_file(image_root, img_info['file_name'])
                        
                        if not relative_path:
                            skipped_annotations += 1
                            continue
                        
                        img_path = os.path.join(image_root, relative_path)
                        if not os.path.exists(img_path):
                            skipped_annotations += 1
                            continue
                        
                        # Convert bbox to YOLO format
                        bbox = ann.get('bbox')
                        if not bbox:
                            continue
                            
                        yolo_bbox = xywh_to_yolo(
                            bbox,
                            img_info['width'],
                            img_info['height']
                        )
                        
                        if not yolo_bbox:
                            skipped_annotations += 1
                            continue
                        
                        # Convert segmentation to mask
                        segmentation = ann.get('segmentation')
                        if not segmentation:
                            continue
                            
                        mask = polygon_to_mask(
                            segmentation,
                            img_info['height'],
                            img_info['width']
                        )
                        
                        if mask is None:
                            skipped_annotations += 1
                            continue
                        
                        # Add to dataset
                        if img_path not in dataset_dicts:
                            dataset_dicts[img_path] = {
                                'image_id': img_id,
                                'width': img_info['width'],
                                'height': img_info['height'],
                                'bboxes': [],
                                'masks': []
                            }
                            total_images += 1
                        
                        dataset_dicts[img_path]['bboxes'].append(yolo_bbox)
                        dataset_dicts[img_path]['masks'].append(mask)
                        total_annotations += 1
                        file_annotations += 1
                        
                    except Exception as e:
                        print(f"Error processing annotation: {str(e)}")
                        continue
                        
                print(f"Processed {json_file}: Found {file_annotations} valid balloon annotations")
                
        except json.JSONDecodeError as e:
            print(f"Error decoding {json_file}: {str(e)}")
            continue
        except Exception as e:
            print(f"Error processing {json_file}: {str(e)}")
            continue
    
    print("\n=== Data Processing Summary ===")
    print(f"Total images with valid annotations: {total_images}")
    print(f"Total valid annotations: {total_annotations}")
    print(f"Skipped annotations: {skipped_annotations}")
    
    if not dataset_dicts:
        print("\nERROR: No valid data found after processing!")
        print("Please check the following:")
        print(f"1. Target category ID ({TARGET_CATEGORY_ID}) exists in the dataset")
        print("2. Image files exist in the correct structure")
        print("3. Annotations have valid segmentation data")
        raise ValueError("No valid data found after processing")
    
    print(f"\nFinished data preparation. Found {len(dataset_dicts)} images containing '{TARGET_CATEGORY_NAME}'.")
    return dataset_dicts

In [97]:
# --- 3. Split and Prepare YOLO Dataset ---

# Prepare the data
all_data = prepare_manga_balloon_data(JSON_DIR, IMAGE_ROOT_DIR)

# --- Group data by manga title ---
print("\nGrouping data by manga series for a robust train/val split...")
grouped_data = defaultdict(dict)
for img_path, data in all_data.items():
    # Extract manga name from the file path
    manga_name = Path(img_path).parent.name
    grouped_data[manga_name][img_path] = data

print(f"Found {len(grouped_data)} unique manga series.")

# --- Split manga titles, not individual pages ---
manga_titles = list(grouped_data.keys())
train_titles, val_titles = train_test_split(manga_titles, test_size=0.2, random_state=42)

print(f"Splitting into {len(train_titles)} series for training and {len(val_titles)} for validation.")

# Function to copy images and create label/mask files
def process_dataset_split(titles, split_type):
    total_masks = 0
    for manga_title in titles:
        for img_path, data in grouped_data[manga_title].items():
            # Generate unique identifier for the image
            img_identifier = f"{manga_title}_{Path(img_path).stem}"
            
            # Copy image
            dest_img_path = os.path.join(DATASET_DIR, f'images/{split_type}', 
                                       f"{img_identifier}.jpg")
            shutil.copy2(img_path, dest_img_path)
            
            # Create label file (for bounding boxes)
            label_path = os.path.join(DATASET_DIR, f'labels/{split_type}', f"{img_identifier}.txt")
            
            # Create masks directory for this image
            masks_dir = os.path.join(DATASET_DIR, f'masks/{split_type}', img_identifier)
            os.makedirs(masks_dir, exist_ok=True)
            
            # Write YOLO format labels and save masks
            with open(label_path, 'w') as f:
                for idx, (bbox, mask) in enumerate(zip(data['bboxes'], data['masks'])):
                    # Write bbox in YOLO format
                    f.write(f"0 {' '.join(map(str, bbox))}\n")
                    
                    # Save mask as PNG file (better for binary masks)
                    mask_path = os.path.join(masks_dir, f'{idx}.png')
                    cv2.imwrite(mask_path, (mask * 255).astype(np.uint8))
                    total_masks += 1
    
    return total_masks

# Process train and validation splits
print("\nProcessing training split...")
train_masks = process_dataset_split(train_titles, 'train')
print("Processing validation split...")
val_masks = process_dataset_split(val_titles, 'val')

# Verify split
train_images = len(list(Path(DATASET_DIR).glob('images/train/*.jpg')))
val_images = len(list(Path(DATASET_DIR).glob('images/val/*.jpg')))

# Count masks (using proper path pattern)
train_mask_files = sum(len(list(Path(mask_dir).glob('*.png'))) 
                      for mask_dir in Path(DATASET_DIR).glob('masks/train/*'))
val_mask_files = sum(len(list(Path(mask_dir).glob('*.png'))) 
                    for mask_dir in Path(DATASET_DIR).glob('masks/val/*'))

print(f"\nDataset created successfully:")
print(f"Training images: {train_images}")
print(f"Training masks: {train_mask_files}")
print(f"Validation images: {val_images}")
print(f"Validation masks: {val_mask_files}")

# Additional verification
print("\nVerifying mask creation:")
train_mask_dirs = list(Path(DATASET_DIR).glob('masks/train/*'))
val_mask_dirs = list(Path(DATASET_DIR).glob('masks/val/*'))
print(f"Number of training image directories with masks: {len(train_mask_dirs)}")
print(f"Number of validation image directories with masks: {len(val_mask_dirs)}")

if train_mask_files == 0 or val_mask_files == 0:
    print("\nWARNING: No masks were created! Checking first few images for debugging:")
    sample_images = list(Path(DATASET_DIR).glob('images/train/*.jpg'))[:5]
    for img_path in sample_images:
        img_id = img_path.stem
        mask_dir = Path(DATASET_DIR) / 'masks/train' / img_id
        print(f"\nChecking {img_id}:")
        print(f"- Image exists: {img_path.exists()}")
        print(f"- Mask directory exists: {mask_dir.exists()}")
        if mask_dir.exists():
            print(f"- Number of masks: {len(list(mask_dir.glob('*.png')))}")
            print(f"- Mask directory contents: {list(mask_dir.glob('*'))}")

# Save split information
split_info = pd.DataFrame([
    {'manga_title': title, 'dataset_split': 'train'} for title in train_titles
] + [
    {'manga_title': title, 'dataset_split': 'validation'} for title in val_titles
])
split_info.sort_values('manga_title').to_csv('manga_split_summary.csv', index=False)

Processing 109 JSON files using 4 cores...


Processing JSON files:   0%|          | 0/109 [00:00<?, ?it/s]

Found 10619 images and 130180 annotations

Converting annotations to YOLO format...


Converting annotations:   0%|          | 0/10619 [00:00<?, ?it/s]

Finished data preparation. Found 9916 images containing 'balloon'.

Grouping data by manga series for a robust train/val split...
Found 109 unique manga series.
Splitting into 87 series for training and 22 for validation.

Processing training split...
Processing validation split...

Dataset created successfully:
Training images: 8690
Validation images: 2687


In [98]:
# ===================================================================
# --- Create, Display, and Save Split Summary ---
# ===================================================================
print("\n--- Generating Train/Validation Split Summary ---")

# Create a list of dictionaries for the DataFrame
train_split_info = [{'manga_title': title, 'dataset_split': 'train'} for title in train_titles]
val_split_info = [{'manga_title': title, 'dataset_split': 'validation'} for title in val_titles]

# Combine and create the DataFrame
split_summary_df = pd.DataFrame(train_split_info + val_split_info)
split_summary_df = split_summary_df.sort_values(by='manga_title').reset_index(drop=True)

# Display the DataFrame in the notebook output
print("Manga Series Split Distribution:")
print(split_summary_df.to_string()) # .to_string() ensures all rows are printed

# Save the DataFrame to a CSV file
csv_filename = "manga_split_summary.csv"
split_summary_df.to_csv(csv_filename, index=False)
print(f"\nSplit summary has been saved to '{csv_filename}'")
# ===================================================================


--- Generating Train/Validation Split Summary ---
Manga Series Split Distribution:
                          manga_title dataset_split
0                                ARMS    validation
1                  AisazuNihaIrarenai         train
2                    AkkeraKanjinchou         train
3                             Akuhamu         train
4                        AosugiruHaru    validation
5                       AppareKappore         train
6                               Arisa         train
7                           BEMADER_P         train
8                 BakuretsuKungFuGirl         train
9                            Belmondo         train
10                  BokuHaSitatakaKun    validation
11            BurariTessenTorimonocho    validation
12                        ByebyeC-BOY    validation
13                Count3DeKimeteAgeru         train
14                            DollGun         train
15                       Donburakokko         train
16                        DualJu

In [99]:
# --- 4. Create YAML Configuration ---

# Tạo thư mục output nếu chưa tồn tại
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

# Define dataset configuration
dataset_config = {
    # Đường dẫn dataset
    'path': os.path.abspath(DATASET_DIR),  # Đường dẫn tuyệt đối đến thư mục dataset
    'train': os.path.join('images', 'train'),  # Thư mục ảnh training
    'val': os.path.join('images', 'val'),      # Thư mục ảnh validation
    'test': os.path.join('images', 'val'),     # Sử dụng validation set làm test set
    
    # Thông tin classes
    'names': {
        0: TARGET_CATEGORY_NAME  # Class 0: balloon
    },
    
    # Thông số task
    'task': 'segment',          # Task: instance segmentation
    'nc': 1,                    # Số lượng class: 1 (balloon)
    
    # Thông tin thêm
    'width': 512,               # Giảm kích thước ảnh để tiết kiệm bộ nhớ
    'height': 512,              # Giữ tỷ lệ 1:1
    'batch': 1,                 # Batch size nhỏ nhất cho segmentation
    'epochs': 100,              # Số epochs khuyến nghị
    'device': [0, 1],           # Sử dụng cả 2 GPU
}

# Save configuration vào thư mục output
yaml_path = os.path.join(output_dir, 'dataset.yaml')
with open(yaml_path, 'w') as f:
    yaml.dump(dataset_config, f, default_flow_style=False, sort_keys=False)

# Save một bản copy trong thư mục dataset
dataset_yaml_path = os.path.join(DATASET_DIR, 'dataset.yaml')
with open(dataset_yaml_path, 'w') as f:
    yaml.dump(dataset_config, f, default_flow_style=False, sort_keys=False)

print("=== Dataset Configuration ===")
print(f"\nYAML files saved to:")
print(f"1. Output directory: {yaml_path}")
print(f"2. Dataset directory: {dataset_yaml_path}")
print("\nDataset configuration:")
print(yaml.dump(dataset_config, default_flow_style=False, sort_keys=False))

# Verify the files exist
for path in [yaml_path, dataset_yaml_path]:
    if os.path.exists(path):
        print(f"\nVerified: {path} exists")
        print(f"File size: {os.path.getsize(path)} bytes")
    else:
        print(f"\nWarning: {path} was not created!")

=== Dataset Configuration ===

YAML files saved to:
1. Output directory: output/dataset.yaml
2. Dataset directory: balloon_dataset/dataset.yaml

Dataset configuration:
path: /kaggle/working/balloon_dataset
train: images/train
val: images/val
test: images/val
names:
  0: balloon
task: detect
nc: 1
width: 1280
height: 1280
batch: 4
epochs: 100
device: 0


Verified: output/dataset.yaml exists
File size: 186 bytes

Verified: balloon_dataset/dataset.yaml exists
File size: 186 bytes


In [100]:
# --- 5. Initialize and Train YOLOv8 Model ---

# Kiểm tra GPU và clear cache
print("=== GPU Configuration ===")
if torch.cuda.is_available():
    torch.cuda.empty_cache()  # Clear GPU cache
    props = torch.cuda.get_device_properties(0)
    print(f"Using GPU: {props.name}")
    print(f"- Memory: {props.total_memory / 1024**3:.1f} GB")
    print(f"- CUDA Capability: {props.major}.{props.minor}")
else:
    print("WARNING: No GPU found. Training on CPU will be very slow!")

# Initialize YOLOv8 model
print("\n=== Model Initialization ===")
print("Loading YOLOv8-seg model...")
model = YOLO('yolov8s-seg.pt')  # Load segmentation model

# Calculate optimal batch size for single GPU
BATCH_SIZE = 1  # Reduced batch size for segmentation
ACCUMULATE = max(1, round(64 / BATCH_SIZE))  # Accumulate gradients

# Verify YAML file exists
if not os.path.exists(yaml_path):
    raise FileNotFoundError(f"YAML configuration file not found at {yaml_path}")
print(f"\nUsing configuration from: {yaml_path}")

# Cấu hình training
train_args = {
    # [1] Cấu hình dataset
    'data': yaml_path,         # File cấu hình dataset
    
    # [2] Các tham số training chính
    'epochs': 100,            # Số epochs training
    'imgsz': 512,            # Giảm kích thước ảnh
    'batch': BATCH_SIZE,     # Batch size nhỏ hơn
    'patience': 15,          # Early stopping nếu không cải thiện
    
    # [3] Tối ưu hóa
    'lr0': 0.0001,          # Giảm learning rate
    'lrf': 0.01,            # Learning rate cuối
    'weight_decay': 0.0005, # Chống overfitting
    'warmup_epochs': 3.0,   # Epochs warmup
    'warmup_momentum': 0.8, # Momentum during warmup
    'warmup_bias_lr': 0.1,  # Warmup initial bias lr
    'box': 7.5,            # Box loss gain
    'cls': 0.5,            # Cls loss gain
    'mask_ratio': 4,       # Mask loss gain
    'overlap': True,       # Overlap masks for segmentation
    
    # [4] Data augmentation (giảm bớt để tiết kiệm bộ nhớ)
    'fliplr': 0.5,          # Lật ngang
    'flipud': 0.0,          # Không lật dọc
    'mosaic': 0.0,          # Tắt mosaic do cần nhiều bộ nhớ
    'mixup': 0.0,           # Tắt mixup
    'copy_paste': 0.0,      # Tắt copy-paste
    'degrees': 0.0,         # Image rotation
    'translate': 0.1,       # Giảm translation
    'scale': 0.2,          # Giảm scale
    'shear': 0.0,          # Image shear
    
    # [5] DataLoader configuration
    'workers': 0,           # Disable multiprocessing for DataLoader
    'pin_memory': False,    # Disable pin_memory
    'persistent_workers': False,  # Disable persistent workers
    'rect': False,         # Tắt rectangular training cho segmentation
    'cos_lr': True,        # Cosine learning rate
    'amp': True,           # Auto mixed precision
    'multi_scale': False,  # Tắt multi-scale training
    
    # [6] Thư mục và checkpoint
    'project': os.path.join(output_dir, 'runs/segment'),
    'name': 'balloon_yolov8seg',
    'exist_ok': True,
    'pretrained': True,
    
    # [7] Memory optimization
    'cache': False,        # Không cache images
    'image_weights': False,# Không dùng image weights
    'save_period': -1,    # Chỉ lưu best model
    'val': True,          # Validate mỗi epoch
    'save': True,         # Lưu checkpoints
    
    # [8] Segmentation specific
    'mask': True,         # Enable mask training
    'retina_masks': True, # Use high-quality masks
    'overlap_mask': True, # Allow mask overlap
    'mask_ratio': 4,      # Mask loss ratio
    'dropout': 0.2,       # Dropout rate
}

print("\n=== Training Configuration ===")
sections = {
    'Dataset': ['data'],
    'Training': ['epochs', 'imgsz', 'batch', 'patience'],
    'Optimization': ['lr0', 'lrf', 'weight_decay', 'warmup_epochs', 'cos_lr'],
    'Augmentation': ['fliplr', 'mosaic', 'mixup', 'copy_paste', 'scale'],
    'GPU Config': ['device', 'workers', 'amp'],
    'Segmentation': ['mask', 'retina_masks', 'overlap_mask', 'mask_ratio'],
    'Output': ['project', 'name']
}

for section, params in sections.items():
    print(f"\n{section}:")
    for p in params:
        if p in train_args:
            print(f"  {p}: {train_args[p]}")

print("\nNOTES:")
print(f"- Using single GPU with minimal batch size {BATCH_SIZE}")
print(f"- Gradient accumulation steps: {ACCUMULATE}")
print("- Auto mixed precision (AMP) enabled for memory efficiency")
print("- Disabled multiprocessing in DataLoader")
print("- Disabled heavy augmentations to save memory")
print("- High-quality segmentation masks enabled")
print(f"- All outputs will be saved to: {train_args['project']}")

=== GPU Configuration ===
Using GPU: Tesla T4
- Memory: 14.7 GB
- CUDA Capability: 7.5

=== Model Initialization ===
Loading YOLOv8 model...

Using configuration from: output/dataset.yaml

=== Training Configuration ===

Dataset:
  data: output/dataset.yaml

Training:
  epochs: 100
  imgsz: 1280
  batch: 4
  patience: 15

Optimization:
  lr0: 0.001
  lrf: 0.01
  weight_decay: 0.0005
  warmup_epochs: 3.0
  cos_lr: True

Augmentation:
  fliplr: 0.5
  mosaic: 1.0
  mixup: 0.2
  copy_paste: 0.1
  scale: 0.3

GPU Config:
  device: 1
  workers: 8
  amp: True

Output:
  project: output/runs/detect
  name: balloon_yolov8

NOTES:
- Using single GPU with batch size 4
- Gradient accumulation steps: 16
- Auto mixed precision (AMP) enabled for faster training
- Early stopping will pause training if no improvement in 15 epochs
- Cosine learning rate scheduler enabled
- Using 8 worker processes for data loading
- All outputs will be saved to: output/runs/detect


In [None]:
!nvidia-smi

In [101]:
# --- Start Training ---

print("=== Training Setup ===")
if torch.cuda.is_available():
    print(f"Training on GPU: {torch.cuda.get_device_name(0)}")
    print(f"Batch size: {train_args['batch']}")
else:
    print("Training on CPU (not recommended)")
    print(f"Batch size: {train_args['batch']}")

print(f"\nTraining will run for {train_args['epochs']} epochs")
print(f"Early stopping patience: {train_args['patience']} epochs")
print(f"Results will be saved to: {os.path.join(train_args['project'], train_args['name'])}")

# Training with progress bar
from tqdm.notebook import tqdm
with tqdm(total=train_args['epochs'], desc='Training Progress') as pbar:
    def on_train_epoch_end(trainer):
        pbar.update(1)
        # Log GPU memory usage
        if torch.cuda.is_available():
            mem_used = torch.cuda.memory_reserved(0) / 1024**2
            mem_total = torch.cuda.get_device_properties(0).total_memory / 1024**3
            print(f"\nGPU Memory Used: {mem_used:.0f}MB / {mem_total:.1f}GB")
    
    results = model.train(
        **train_args
    )

# Print detailed results summary
print("\n=== Training Results ===")
metrics = results.results_dict
print("\nAccuracy Metrics:")
print(f"mAP50-95: {metrics.get('metrics/mAP50-95(B)', 'N/A')} (primary metric)")
print(f"mAP50: {metrics.get('metrics/mAP50(B)', 'N/A')}")
print(f"Precision: {metrics.get('metrics/precision(B)', 'N/A')}")
print(f"Recall: {metrics.get('metrics/recall(B)', 'N/A')}")

print("\nTraining Statistics:")
print(f"Best epoch: {results.best_epoch}")
print(f"Final epoch: {results.epoch}")
if results.epoch < train_args['epochs']:
    print("Note: Training stopped early due to patience criterion")

# Save final model
final_model_path = os.path.join(train_args['project'], train_args['name'], 'weights/best.pt')
print(f"\nBest model saved to: {final_model_path}")

# Plot results
print("\nGenerating training plots...")
fig = results.plot_results()
print("Training plots have been saved to the project directory")

=== Training Setup ===
Training on GPU: Tesla T4
Batch size: 4

Training will run for 100 epochs
Early stopping patience: 15 epochs
Results will be saved to: output/runs/detect/balloon_yolov8


Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Ultralytics 8.3.205 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:1 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.1, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=output/dataset.yaml, degrees=0.0, deterministic=True, device=1, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.2, mode=train, model=yolov8s-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=balloon_yolov8, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=15, perspective=0.0, plots

OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 3196 has 14.73 GiB memory in use. Of the allocated memory 14.50 GiB is allocated by PyTorch, and 80.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# --- 6. Model Evaluation ---
print("\n=== Starting Model Evaluation ===")

# Validate on validation set
print("\nRunning validation...")
val_results = model.val(
    data=yaml_path,
    split='val',
    conf=0.25,  # Confidence threshold
    iou=0.45,   # NMS IOU threshold
    verbose=True
)

# Print detailed metrics
metrics = val_results.results_dict
print("\nDetailed Evaluation Metrics:")
print("1. Average Precision (AP):")
print(f"  - mAP50-95: {metrics['metrics/mAP50-95']:.4f}")
print(f"  - mAP50: {metrics['metrics/mAP50']:.4f}")
print(f"  - mAP75: {metrics['metrics/mAP75']:.4f}")

print("\n2. Precision/Recall:")
print(f"  - Precision: {metrics['metrics/precision']:.4f}")
print(f"  - Recall: {metrics['metrics/recall']:.4f}")

print("\n3. Performance:")
print(f"  - Speed: {metrics.get('speed/inference', 'N/A')} ms per image")
print(f"  - Total images: {val_results.need}")
print(f"  - Processed images: {val_results.done}")

# Plot confusion matrix
val_results.plot_confusion_matrix()
print("\nConfusion matrix has been saved to the project directory")

In [None]:
# --- 7. Visualize Predictions ---
print("\n=== Generating Visualizations ===")

# Create directories
vis_output_dir = "./balloon_test_visualizations"
os.makedirs(vis_output_dir, exist_ok=True)

# Get validation images
val_images_dir = os.path.join(DATASET_DIR, 'images/val')
val_images = list(Path(val_images_dir).glob('*.jpg'))
print(f"\nFound {len(val_images)} validation images")

# Select random samples
num_samples = min(20, len(val_images))
sample_images = random.sample(val_images, num_samples)

print(f"\nProcessing {num_samples} random samples...")
for i, img_path in enumerate(sample_images, 1):
    print(f"\nImage {i}/{num_samples}: {img_path.name}")
    
    # Run prediction
    results = model.predict(
        source=str(img_path),
        save=True,          # Save annotated images
        save_txt=True,      # Save predictions as txt
        conf=0.25,          # Confidence threshold
        iou=0.45,          # NMS IOU threshold
        line_width=2,       # Box thickness
        boxes=True,         # Show boxes
        labels=True,        # Show labels
        hide_conf=False     # Show confidences
    )
    
    # Print detection summary
    r = results[0]  # Get first (only) result
    print(f"Detections: {len(r)} balloons")
    if len(r) > 0:
        print("Confidence scores:", [f"{conf:.2f}" for conf in r.boxes.conf])
    
    # Copy visualization
    pred_img = Path(r.save_dir) / img_path.name
    if pred_img.exists():
        dest_path = os.path.join(vis_output_dir, img_path.name)
        shutil.copy2(pred_img, dest_path)
        print(f"Saved visualization to: {dest_path}")

print(f"\nAll visualizations saved to '{vis_output_dir}'")
print("Note: Green boxes show predicted balloons with confidence scores")

In [None]:
# --- 8. Export and Save Model ---
print("\n=== Exporting Model ===")

# 1. Save PyTorch model
best_model_path = 'balloon_detector_best.pt'
shutil.copy2(str(model.best), best_model_path)
print(f"\n1. PyTorch model saved as '{best_model_path}'")

# 2. Export to ONNX
print("\n2. Exporting to ONNX format...")
model.export(format='onnx', dynamic=True)
print("ONNX model exported successfully")

# 3. Export to TensorRT (if supported)
if torch.cuda.is_available():
    try:
        print("\n3. Exporting to TensorRT format...")
        model.export(format='engine', dynamic=True)
        print("TensorRT model exported successfully")
    except Exception as e:
        print(f"TensorRT export failed: {e}")

# 4. Export to CoreML (if on MacOS)
if os.uname().sysname == 'Darwin':
    try:
        print("\n4. Exporting to CoreML format...")
        model.export(format='coreml', dynamic=True)
        print("CoreML model exported successfully")
    except Exception as e:
        print(f"CoreML export failed: {e}")

# Save training plots
print("\nGenerating training plots...")
results.plot_results()
print("Training plots saved in the project directory")

# Print final summary
print("\n=== Export Summary ===")
print("The following files have been created:")
print(f"1. PyTorch model: {best_model_path}")
print(f"2. ONNX model: {best_model_path.replace('.pt', '.onnx')}")
print("3. Training plots: In the project directory")
print("\nModel is ready for deployment!")