In [None]:
import os
import cv2
import json
import shutil
import random
import numpy as np
from pathlib import Path
from tqdm import tqdm

# --- ULTIMATE AYARLAR ---
IMG_SIZE = 1280  
CLAHE_CLIP = 3.0 

# Dosya YollarÄ±
BASE_DIR = Path("../data")
RAW_DIR = BASE_DIR / "raw/DENTEX/train/training_data/quadrant-enumeration-disease"
RAW_IMG_DIR = RAW_DIR / "xrays"
RAW_JSON = RAW_DIR / "train_quadrant_enumeration_disease.json"

# Ã‡Ä±ktÄ± Yeri
OUTPUT_DIR = BASE_DIR / "processed_ultimate"

# SÄ±nÄ±f HaritasÄ± (YOLO FormatÄ±)
CLASS_MAPPING = {
    0: 0, # Impacted
    1: 1, # Caries
    2: 2, # Periapical Lesion
    3: 3  # Deep Caries
}

print(f"Hedef Ã‡Ã¶zÃ¼nÃ¼rlÃ¼k: {IMG_SIZE}x{IMG_SIZE}")
print(f"Kontrast ArtÄ±rma (CLAHE): Aktif (Clip Limit: {CLAHE_CLIP})")
print(f"Ham Veri Yolu: {RAW_DIR}")

In [None]:
def apply_clahe(image):
    """
    RÃ¶ntgen gÃ¶rÃ¼ntÃ¼lerine Contrast Limited Adaptive Histogram Equalization uygular.
    Bu, Ã§Ã¼rÃ¼klerin ve lezyonlarÄ±n daha belirgin olmasÄ±nÄ± saÄŸlar.
    """
    # GÃ¶rÃ¼ntÃ¼ renkli (3 kanal) ise griye Ã§evir
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image
    
    # CLAHE OluÅŸtur
    clahe = cv2.createCLAHE(clipLimit=CLAHE_CLIP, tileGridSize=(8,8))
    enhanced_img = clahe.apply(gray)
    
    # YOLO iÃ§in tekrar 3 kanallÄ± (BGR) formata Ã§evir
    return cv2.cvtColor(enhanced_img, cv2.COLOR_GRAY2BGR)

def resize_letterbox(image, target_size):
    """
    Resmi bozmadan target_size iÃ§ine sÄ±ÄŸdÄ±rÄ±r ve padding ekler.
    """
    h, w = image.shape[:2]
    scale = min(target_size / w, target_size / h)
    nw, nh = int(w * scale), int(h * scale)
    
    # Resmi kÃ¼Ã§Ã¼lt/bÃ¼yÃ¼t
    image_resized = cv2.resize(image, (nw, nh))
    
    # Gri bir tuval oluÅŸtur
    image_padded = np.full((target_size, target_size, 3), 114, dtype=np.uint8)
    
    # Ortala
    dx = (target_size - nw) // 2
    dy = (target_size - nh) // 2
    image_padded[dy:dy+nh, dx:dx+nw] = image_resized
    
    return image_padded, scale, dx, dy

In [None]:
# 1. KlasÃ¶r TemizliÄŸi
if OUTPUT_DIR.exists():
    shutil.rmtree(OUTPUT_DIR)

for split in ['train', 'val']:
    (OUTPUT_DIR / split / 'images').mkdir(parents=True, exist_ok=True)
    (OUTPUT_DIR / split / 'labels').mkdir(parents=True, exist_ok=True)

# 2. JSON YÃ¼kle ve BÃ¶l (Split)
with open(RAW_JSON, 'r') as f:
    data = json.load(f)

all_images = data['images']
random.seed(42) 
random.shuffle(all_images)

split_idx = int(len(all_images) * 0.1)
val_images = all_images[:split_idx]
train_images = all_images[split_idx:]

splits = {'train': train_images, 'val': val_images}

ann_dict = {}
for ann in data['annotations']:
    img_id = ann['image_id']
    if img_id not in ann_dict: ann_dict[img_id] = []
    ann_dict[img_id].append(ann)

print("ðŸš€ Ä°ÅŸlem BaÅŸlÄ±yor...")

# 3. Ana DÃ¶ngÃ¼
for split_name, images in splits.items():
    print(f"\n--- {split_name.upper()} Seti Ä°ÅŸleniyor ({len(images)} resim) ---")
    
    for img_info in tqdm(images):
        file_name = img_info['file_name']
        img_id = img_info['id']
        src_path = RAW_IMG_DIR / file_name
        
        if not src_path.exists():
            continue
            
        # A. RESMÄ° OKU
        img = cv2.imread(str(src_path))
        
        # B. CLAHE UYGULA (Parlat)
        img_enhanced = apply_clahe(img)
        
        # C. RESIZE (1280px)
        processed_img, scale, dx, dy = resize_letterbox(img_enhanced, IMG_SIZE)
        
        # D. KAYDET (Resim)
        save_img_path = OUTPUT_DIR / split_name / 'images' / file_name
        cv2.imwrite(str(save_img_path), processed_img)
        
        # E. ETÄ°KETLERÄ° DÃ–NÃœÅžTÃœR VE KAYDET
        yolo_labels = []
        if img_id in ann_dict:
            for ann in ann_dict[img_id]:
                cat_id = ann.get('category_id_3')
                
                if cat_id in CLASS_MAPPING:
                    bbox = ann['bbox'] # x, y, w, h
                    
                    # Koordinat DÃ¶nÃ¼ÅŸÃ¼mÃ¼
                    x = bbox[0] * scale + dx
                    y = bbox[1] * scale + dy
                    w = bbox[2] * scale
                    h = bbox[3] * scale
                    
                    # YOLO Normalize (0-1 arasÄ±)
                    xc = (x + w/2) / IMG_SIZE
                    yc = (y + h/2) / IMG_SIZE
                    wn = w / IMG_SIZE
                    hn = h / IMG_SIZE
                    
                    # SÄ±nÄ±r KontrolÃ¼ (0-1 dÄ±ÅŸÄ±na taÅŸmasÄ±n)
                    xc = max(0, min(1, xc))
                    yc = max(0, min(1, yc))
                    wn = max(0, min(1, wn))
                    hn = max(0, min(1, hn))
                    
                    yolo_labels.append(f"{CLASS_MAPPING[cat_id]} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")
        
        # Txt DosyasÄ±nÄ± Yaz
        if yolo_labels:
            label_name = Path(file_name).stem + ".txt"
            save_label_path = OUTPUT_DIR / split_name / 'labels' / label_name
            with open(save_label_path, 'w') as f:
                f.write("\n".join(yolo_labels))

print(f"\nâœ… Ä°ÅŸlem TamamlandÄ±! Veriler burada: {OUTPUT_DIR}")

In [None]:
yaml_content = f"""
train: /content/dataset/processed_ultimate/train/images
val: /content/dataset/processed_ultimate/val/images

nc: 4
names:
  0: Impacted
  1: Caries
  2: Periapical Lesion
  3: Deep Caries
"""

with open(OUTPUT_DIR / "data.yaml", 'w') as f:
    f.write(yaml_content)

print("âœ… data.yaml oluÅŸturuldu.")

In [None]:
!cd ../data && zip -r dental_ultimate.zip processed_ultimate