In [None]:
###true 
import os
import json
import glob
import shutil
from tqdm import tqdm
import cv2

# ==========================================
# 1. PATH CONFIGURATION
# ==========================================
RAW_ROOT   = "/kaggle/input/car-damage-labeled-yolo"   # Original Kaggle dataset path
SPLIT_ROOT = "/kaggle/working/split_data"              # Output path for YOLO

# Create directories for Train and Val (images & labels)
for sub in ["train/images", "train/labels", "val/images", "val/labels"]:
    os.makedirs(os.path.join(SPLIT_ROOT, sub), exist_ok=True)

# Check content of input folder
print("Content of RAW_ROOT:")
for p in os.listdir(RAW_ROOT):
    print(" -", p)

# ==========================================
# 2. CLASS MAPPING
# ==========================================
# Map JSON class names to YOLO IDs
CLASS_MAP = {
    "rach": 0,
    "tray_son": 1,
    "vo_kinh": 2,
    "mat_bo_phan": 3,
    "thung": 4,
    "mop_lom": 5,
    "be_den": 6,
}

# ==========================================
# 3. HELPER FUNCTIONS
# ==========================================

def find_image_path(filename):
    """Find the image path inside RAW_ROOT based on the filename."""
    matches = glob.glob(os.path.join(RAW_ROOT, "**", filename), recursive=True)
    if not matches:
        return None
    return matches[0]

def process_json(json_path, split):
    """
    Read specific JSON (Train/Val), copy images to SPLIT_ROOT, and create YOLO labels.
    """
    print(f"\n Processing JSON: {json_path} -> split='{split}'")

    with open(json_path, "r") as f:
        data = json.load(f)

    # Convert to list if data is a dictionary (VIA format)
    if isinstance(data, dict):
        entries = list(data.values())
    else:
        entries = data

    # Define output directories
    img_out_dir = os.path.join(SPLIT_ROOT, split, "images")
    lbl_out_dir = os.path.join(SPLIT_ROOT, split, "labels")

    image_count = 0
    box_count = 0

    for entry in tqdm(entries, desc=f"{split} JSON entries"):
        filename = entry.get("name", "")
        if not filename:
            continue

        # Find source image
        img_src = find_image_path(filename)
        if not img_src:
            # Skip if image is not found
            continue

        # Read image to get dimensions
        img = cv2.imread(img_src)
        if img is None:
            continue
        h_img, w_img, _ = img.shape

        # Copy image to destination
        img_dest = os.path.join(img_out_dir, os.path.basename(img_src))
        shutil.copy2(img_src, img_dest)

        # Prepare label file path (.txt)
        txt_dest = os.path.splitext(os.path.join(lbl_out_dir, os.path.basename(img_src)))[0] + ".txt"
        yolo_lines = []

        regions = entry.get("regions", [])
        for region in regions:
            all_x = region.get("all_x", [])
            all_y = region.get("all_y", [])
            class_name = region.get("class", "rach")  # default class

            if not all_x or not all_y:
                continue
            if class_name not in CLASS_MAP:
                # Skip unknown classes
                continue
            
            class_id = CLASS_MAP[class_name]

            # Calculate bounding box (min/max)
            x_min, x_max = min(all_x), max(all_x)
            y_min, y_max = min(all_y), max(all_y)

            width  = x_max - x_min
            height = y_max - y_min
            cx = x_min + width  / 2
            cy = y_min + height / 2

            # Normalize coordinates (0-1 range for YOLO)
            x_c = max(0, min(1, cx / w_img))
            y_c = max(0, min(1, cy / h_img))
            w_n = max(0, min(1, width  / w_img))
            h_n = max(0, min(1, height / h_img))

            # Add to list
            yolo_lines.append(f"{class_id} {x_c:.6f} {y_c:.6f} {w_n:.6f} {h_n:.6f}")
            box_count += 1

        # Write label file if we have valid boxes
        if yolo_lines:
            with open(txt_dest, "w") as f_lbl:
                f_lbl.write("\n".join(yolo_lines))
            image_count += 1

    print(f"âœ… {split}: Created {image_count} images and {box_count} boxes.")

# ==========================================
# 4. EXECUTION
# ==========================================

# Define JSON paths
train_json = os.path.join(RAW_ROOT, "0Train_via_annos.json")
val_json   = os.path.join(RAW_ROOT, "0Val_via_annos.json")

# Validate files exist
if not os.path.exists(train_json) or not os.path.exists(val_json):
    raise FileNotFoundError("JSON files not found (0Train_via_annos.json / 0Val_via_annos.json). Check dataset structure.")

# Run processing
process_json(train_json, "train")
process_json(val_json, "val")

# Print final stats
print("\n--- Final Status ---")
print("Train images:", len(os.listdir(os.path.join(SPLIT_ROOT, "train/images"))))
print("Train labels:", len(os.listdir(os.path.join(SPLIT_ROOT, "train/labels"))))
print("Val images:",   len(os.listdir(os.path.join(SPLIT_ROOT, "val/images"))))
print("Val labels:",   len(os.listdir(os.path.join(SPLIT_ROOT, "val/labels"))))

In [None]:
import yaml
import os

# ==========================================
# CONFIGURATION SETUP
# ==========================================
# Path where the YAML file will be created
DATA_CFG = "/kaggle/working/damage_config.yaml"

# Define the dictionary structure for YOLO
config = {
    "path": "/kaggle/working/split_data",  # Root folder for the dataset
    "train": "train/images",             # Location of training images
    "val": "val/images",                 # Location of validation images
    "nc": 7,                             # Number of classes (Total: 7)
    
    # English names mapping to IDs (0-6)
    "names": [
        "tear_crack",       # 0 - (Original: rach)
        "scratch",          # 1 - (Original: tray_son)
        "shattered_glass",  # 2 - (Original: vo_kinh)
        "missing_part",     # 3 - (Original: mat_bo_phan)
        "puncture",         # 4 - (Original: thung)
        "dent",             # 5 - (Original: mop_lom)
        "broken_light"      # 6 - (Original: be_den)
    ]
}

# ==========================================
# SAVE YAML FILE
# ==========================================
with open(DATA_CFG, "w") as f:
    yaml.dump(config, f)

print(f"Config saved successfully: {DATA_CFG}")
print("\n--- YAML File Content ---")
print(open(DATA_CFG).read())

In [None]:
# ==========================================
# 1. INSTALL LIBRARY
# ==========================================
# Install YOLOv8 library (Runs in Kaggle environment)
!pip install ultralytics

from ultralytics import YOLO
import torch

# ==========================================
# 2. SYSTEM CHECK (GPU)
# ==========================================
print("CUDA Available :", torch.cuda.is_available())
print("GPU Count      :", torch.cuda.device_count())

# List all available GPUs
for i in range(torch.cuda.device_count()):
    print(f" - GPU {i}:", torch.cuda.get_device_name(i))

# ==========================================
# 3. INITIALIZE MODEL
# ==========================================
# Load a pretrained small model (yolov8s.pt)
model = YOLO("yolov8s.pt")

# ==========================================
# 4. START TRAINING
# ==========================================
results = model.train(
    data="/kaggle/working/damage_config.yaml", # Path to config file from Step 2
    epochs=50,                  # Number of training cycles
    imgsz=640,                  # Image resolution
    batch=16,                   # Batch size (images per step)
    name="car_damage_7cls_final", # Folder name for saving results
    patience=10,                # Stop early if no improvement
    device=0,                   # Use GPU 0
    workers=2,                  # Low worker count for Kaggle stability
)

In [None]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
import os

# ==========================================
# 1. LOAD BEST MODEL
# ==========================================
# Load the best weights saved during training
# Note: The path depends on the 'name' parameter in Step 3
model_path = "/kaggle/working/runs/detect/car_damage_7cls_final/weights/best.pt"

if not os.path.exists(model_path):
    print(f"WARNING: Model file not found at {model_path}")
else:
    model = YOLO(model_path)
    print("Model loaded successfully.")

# ==========================================
# 2. RUN PREDICTIONS
# ==========================================
print("Starting prediction on validation data...")

results = model.predict(
    source="/kaggle/working/split_data/val/images", # Test images folder
    save=True,    # Save the images with bounding boxes
    conf=0.25,    # Confidence threshold (Only accept > 25% sure)
    project="/kaggle/working/runs/detect", # Save location root
    name="predictions", # Save folder name (creates runs/detect/predictions)
    exist_ok=True # Overwrite if folder exists
)

print("\n Results saved to: /kaggle/working/runs/detect/predictions")