In [None]:
!pip install kagglehub torch torchvision numpy pillow matplotlib tqdm

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("orvile/brain-cancer-mri-dataset")
print("Path to dataset files:", path)

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("orvile/brain-cancer-mri-dataset")
print("Path to dataset files:", path)

In [None]:
import os
import shutil
import random

LABEL_MAP = {
    "brain_glioma": "0_glioma",
    "brain_menin": "1_menin",
    "brain_tumor": "2_tumor"
}
SOURCE_DIR = path
DEST_DIR = "dataset"
random.seed(42)

def ensure_dir(p):
    if not os.path.exists(p):
        os.makedirs(p)

for split in ["train", "val", "test"]:
    for label in LABEL_MAP.values():
        ensure_dir(os.path.join(DEST_DIR, split, label))

def split_and_copy(class_name, train_ratio=0.7, val_ratio=0.15):
    src_folder = os.path.join(SOURCE_DIR, class_name)
    label_folder = LABEL_MAP[class_name]
    files = [f for f in os.listdir(src_folder) if f.lower().endswith(".jpg")]
    random.shuffle(files)
    total = len(files)
    train_split = int(train_ratio * total)
    val_split = int(val_ratio * total)
    train_files = files[:train_split]
    val_files = files[train_split:train_split + val_split]
    test_files = files[train_split + val_split:]
    for fname in train_files:
        shutil.copy(os.path.join(src_folder, fname), os.path.join(DEST_DIR, "train", label_folder, fname))
    for fname in val_files:
        shutil.copy(os.path.join(src_folder, fname), os.path.join(DEST_DIR, "val", label_folder, fname))
    for fname in test_files:
        shutil.copy(os.path.join(src_folder, fname), os.path.join(DEST_DIR, "test", label_folder, fname))
    print(f"{class_name}: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test")

for cls in LABEL_MAP:
    split_and_copy(cls)

In [None]:
import numpy as np
from PIL import Image
from datetime import datetime

SOURCE_ROOT = "dataset"
DEST_ROOT = "preprocessed_data"
TARGET_SIZE = (256, 256)
LOG_PATH = "logs/resize_and_normalize.log"

def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def log(message):
    timestamp = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
    full_message = f"{timestamp} {message}"
    print(full_message)
    with open(LOG_PATH, "a", encoding="utf-8") as f:
        f.write(full_message + "\n")

def process_and_save_image(src_path, dest_path):
    try:
        image = Image.open(src_path).convert("RGB")
        image = image.resize(TARGET_SIZE)
        image_array = np.asarray(image).astype(np.float32) / 255.0
        np.save(dest_path, image_array)
        log(f"[OK] {src_path} → {dest_path}")
    except Exception as e:
        log(f"[ERROR] {src_path} → {e}")

def process_folder(split):
    source_dir = os.path.join(SOURCE_ROOT, split)
    dest_dir = os.path.join(DEST_ROOT, split)
    for class_name in os.listdir(source_dir):
        src_class_dir = os.path.join(source_dir, class_name)
        dst_class_dir = os.path.join(dest_dir, class_name)
        ensure_dir(dst_class_dir)
        for filename in os.listdir(src_class_dir):
            if filename.lower().endswith(".jpg"):
                src_file = os.path.join(src_class_dir, filename)
                dst_file = os.path.join(dst_class_dir, filename.replace(".jpg", ".npy"))
                process_and_save_image(src_file, dst_file)

ensure_dir("logs")
with open(LOG_PATH, "w", encoding="utf-8") as f:
    f.write("=== Resize & Normalize Log Başladı ===\n")
for split in ["train", "val", "test"]:
    process_folder(split)
log("=== Tüm işlemler tamamlandı ===")

In [None]:
import os
from torch.utils.data import Dataset
import numpy as np
import torch
from PIL import Image
from torchvision import transforms

class CustomTumorDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_training=True):
        self.root_dir = root_dir
        self.is_training = is_training
        self.samples = []

        self.class_names = ['0_glioma', '1_menin', '2_tumor']
        self.class_to_idx = {name: int(name.split('_')[0]) for name in self.class_names}
        self.classes = self.class_names

        if transform is None:
            if is_training:
                self.transform = transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.RandomHorizontalFlip(p=0.7),
                    transforms.RandomVerticalFlip(p=0.5),
                    transforms.RandomRotation(degrees=20),
                    transforms.RandomAffine(degrees=0, translate=(0.15, 0.15), scale=(0.85, 1.15), shear=10),
                    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.1),
                    transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ])
            else:
                self.transform = transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ])
        else:
            self.transform = transform

        for class_name in self.class_names:
            class_path = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_path):
                continue

            label = self.class_to_idx[class_name]

            for file_name in os.listdir(class_path):
                if file_name.lower().endswith(('.npy')):
                    file_path = os.path.join(class_path, file_name)
                    self.samples.append((file_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        file_path, label = self.samples[idx]
        if file_path.lower().endswith('.npy'):
            image = np.load(file_path)
            image = Image.fromarray((image * 255).astype(np.uint8))
        else:
            image = Image.open(file_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool2d(1)
        self.excitation = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.GELU(),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.squeeze(x).view(b, c)
        y = self.excitation(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_se=True):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.GELU()
        )
        self.se = SEBlock(out_channels) if use_se else nn.Identity()
        self.residual = nn.Conv2d(in_channels, out_channels, 1) if in_channels != out_channels else nn.Identity()
        
    def forward(self, x):
        return self.se(self.conv(x)) + self.residual(x)

class VGGCustom(nn.Module):
    def __init__(self, num_classes: int = 3):
        super().__init__()

        self.block1 = nn.Sequential(
            ConvBlock(3, 29),
            ConvBlock(29, 58),
            ConvBlock(58, 115),
            nn.Dropout2d(0.05),
            nn.MaxPool2d(2)
        )

        self.block2 = nn.Sequential(
            ConvBlock(115, 58),
            ConvBlock(58, 29),
            ConvBlock(29, 29),
            nn.Dropout2d(0.05),
            nn.MaxPool2d(2)
        )
        
        self.block3 = nn.Sequential(
            ConvBlock(29, 58),
            ConvBlock(58, 115),
            ConvBlock(115, 230),
            nn.Dropout2d(0.05),
            nn.MaxPool2d(2)
        )

        self.blocks = nn.ModuleList([self.block1, self.block2, self.block3])

        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(230 * 4 * 4, 1024), nn.BatchNorm1d(1024), nn.GELU(), nn.Dropout(0.1),
            nn.Linear(1024, 512),         nn.BatchNorm1d(512),  nn.GELU(), nn.Dropout(0.1),
            nn.Linear(512, 256),          nn.BatchNorm1d(256),  nn.GELU(), nn.Dropout(0.05),
            nn.Linear(256, num_classes)
        )

        self._initialize_weights()
        self.freeze_blocks_until(0)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def freeze_blocks_until(self, last_open_idx: int):
        for i, blk in enumerate(self.blocks):
            req_grad = i <= last_open_idx
            for p in blk.parameters():
                p.requires_grad = req_grad

    def forward(self, x):
        for blk in self.blocks:
            x = blk(x)
        x = self.avgpool(x)
        return self.classifier(x)

In [None]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from collections import Counter
import os, time, torch, matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision.transforms.v2 as T
from dataset.custom_dataset import CustomTumorDataset
from models.vgg_custom import VGGCustom
import copy
from torch.amp import autocast, GradScaler
import torch.multiprocessing as mp
from torch.utils.data.distributed import DistributedSampler
import torch.distributed as dist
import torch.nn.functional as F

class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=None, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.alpha, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

EPOCHS, BATCH_SIZE, LR = 50, 16, 3e-4  # Batch size eski haline döndürüldü
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ACCUMULATION_STEPS = 2  # Gradient accumulation steps

def build_dataloaders():
    cpu_pre = T.Compose([T.ToImage(),
                         T.ToDtype(torch.float32, scale=True)])
    train_ds = CustomTumorDataset("preprocessed_data/train", transform=cpu_pre)
    val_ds   = CustomTumorDataset("preprocessed_data/val",   transform=cpu_pre)
    
    print("[LOG] Eğitim veri kümesinin sinif listesi:", train_ds.classes)
    print("[LOG] valid veri kümesinin sinif listesi:", val_ds.classes)
    train_labels = [label for _, label in train_ds.samples]
    val_labels   = [label for _, label in val_ds.samples]

    print("[LOG] train label dağılımı:", Counter(train_labels))
    print("[LOG] valid label dağılımı:", Counter(val_labels))
    
    # DataLoader optimizasyonları
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=4,  # Worker sayısı eski haline döndürüldü
                            pin_memory=True, 
                            persistent_workers=True,
                            prefetch_factor=2)  # Prefetch eski haline döndürüldü
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE*2, shuffle=False,
                            num_workers=4,  # Worker sayısı eski haline döndürüldü
                            pin_memory=True,
                            persistent_workers=True,
                            prefetch_factor=2)  # Prefetch eski haline döndürüldü
    return train_loader, val_loader

def run_epoch(model, loader, criterion, optim=None, gpu_aug=None, scaler=None):
    train = optim is not None
    model.train() if train else model.eval()
    tot_loss = correct = total = 0

    iterator = tqdm(loader, leave=True, ascii=True, dynamic_ncols=True) if train else loader

    for i, (X, y) in enumerate(iterator):
        X, y = X.to(DEVICE, non_blocking=True), y.to(DEVICE, non_blocking=True)
        if train:
            X = gpu_aug(X)
        
        with torch.set_grad_enabled(train):
            with autocast('cuda', enabled=True):
                out  = model(X)
                loss = criterion(out, y)
                if train:
                    loss = loss / ACCUMULATION_STEPS
            
            if train:
                scaler.scale(loss).backward()
                if (i + 1) % ACCUMULATION_STEPS == 0:
                    scaler.unscale_(optim)
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    scaler.step(optim)
                    scaler.update()
                    optim.zero_grad(set_to_none=True)

        tot_loss += loss.item() * y.size(0) * (ACCUMULATION_STEPS if train else 1)
        correct  += (out.argmax(1) == y).sum().item()
        total    += y.size(0)

    return tot_loss / total, correct / total

def train():
    # CUDA optimizasyonları
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = False
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    
    train_loader, val_loader = build_dataloaders()

    # Class weights hesapla
    train_labels = [label for _, label in train_loader.dataset.samples]
    class_sample_count = torch.tensor([(torch.tensor(train_labels) == t).sum() for t in torch.unique(torch.tensor(train_labels))], dtype=torch.float32)
    class_weights = 1. / class_sample_count
    class_weights = class_weights / class_weights.sum() * len(class_sample_count)  # normalize et
    class_weights = class_weights.to(DEVICE)

    model = VGGCustom(num_classes=3).to(DEVICE)
    optim = torch.optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()), lr=LR)
    criterion = FocalLoss(alpha=class_weights, gamma=2)
    scaler = GradScaler('cuda')
    
    # Learning rate warmup ve scheduler
    num_warmup_steps = len(train_loader) * 2  # 2 epoch warmup
    num_training_steps = len(train_loader) * EPOCHS
    
    def lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        return 0.5 * (1.0 + torch.cos(torch.pi * float(current_step - num_warmup_steps) / float(num_training_steps - num_warmup_steps)))
    
    scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda)
    
    gpu_aug = T.Identity()

    # Cache boyutunu sınırla
    max_cache_size = 5  # 5 checkpoint
    cache = []
    stuck_count = 0
    window = 5  # 5 window
    val_loss_hist = []
    val_acc_hist = []
    train_loss_hist = []
    train_acc_hist = []
    best_val_acc = 0
    patience = 5
    no_improve_epochs = 0

    for ep in range(1, EPOCHS + 1):
        t0 = time.time()
        
        tr_l, tr_a = run_epoch(model, train_loader, criterion,
                               optim=optim, gpu_aug=gpu_aug, scaler=scaler)
        vl_l, vl_a = run_epoch(model, val_loader, criterion)

        # Cache güncelle (boyut kontrolü ile)
        cache.append({
            "model": copy.deepcopy(model.state_dict()),
            "optim": copy.deepcopy(optim.state_dict()),
            "train_loss": tr_l,
            "val_loss": vl_l,
            "train_acc": tr_a,
            "val_acc": vl_a
        })
        if len(cache) > max_cache_size:
            cache.pop(0)

        # Bellek temizliği
        torch.cuda.empty_cache()

        train_loss_hist.append(tr_l)
        val_loss_hist.append(vl_l)
        train_acc_hist.append(tr_a)
        val_acc_hist.append(vl_a)
        if len(val_loss_hist) > window:
            val_loss_hist.pop(0)
            train_loss_hist.pop(0)
            val_acc_hist.pop(0)
            train_acc_hist.pop(0)

        # --- Gelişmiş Kriterler ---
        overfit = False
        underfit = False
        
        # 1. Acc farkı ve eğilim analizi
        if len(val_acc_hist) >= 3:
            acc_trend = sum(val_acc_hist[-3:]) / 3 - sum(val_acc_hist[-6:-3]) / 3
            if abs(tr_a - vl_a) > 0.15 and tr_a > vl_a and acc_trend < 0:
                overfit = True
            if tr_a < 0.6 and vl_a < 0.6 and acc_trend < 0:
                underfit = True

        # 2. Loss farkı ve eğilim analizi
        if len(val_loss_hist) >= 3:
            loss_trend = sum(val_loss_hist[-3:]) / 3 - sum(val_loss_hist[-6:-3]) / 3
            if abs(vl_l - tr_l) > 0.2 and vl_l > tr_l and loss_trend > 0:
                overfit = True
            if tr_l > 1.2 and vl_l > 1.2 and loss_trend > 0:
                underfit = True

        # --- Dinamik LR ve rollback ---
        if overfit:
            for g in optim.param_groups:
                g['lr'] *= 0.5
            stuck_count += 1
            print(f"[CACHE] Overfitting tespit edildi! LR yarıya indirildi. (Ep{ep})")
        elif underfit:
            for g in optim.param_groups:
                g['lr'] *= 1.2
            stuck_count += 1
            print(f"[CACHE] Underfitting tespit edildi! LR artırıldı. (Ep{ep})")
        else:
            stuck_count = 0

        if stuck_count >= 2 and len(cache) >= 3:  # 2 önceki checkpoint'e dön
            model.load_state_dict(cache[-3]["model"])
            optim.load_state_dict(cache[-3]["optim"])
            print(f"[CACHE] 2 kez fitting problemi! 2 önceki ağırlıklara dönüldü. (Ep{ep})")
            stuck_count = 0
            continue

        # Early stopping kontrolü
        if vl_a > best_val_acc:
            best_val_acc = vl_a
            no_improve_epochs = 0
            torch.save(model.state_dict(), "models/best_vgg_custom.pt")
            print(f"[✓] Yeni en iyi model kaydedildi! Doğruluk: {vl_a:.4f}")
        else:
            no_improve_epochs += 1

        if no_improve_epochs >= patience:
            print(f"[!] Early stopping: {patience} epoch boyunca iyileşme yok.")
            break

        # Progressive block açma
        opened = sum([any(p.requires_grad for p in blk.parameters()) for blk in model.blocks])
        if opened == 2:
            block_open_threshold = 7  # 2 blok açıkken 3. blok için
        else:
            block_open_threshold = 4  # Diğer bloklar için
        if no_improve_epochs >= block_open_threshold and hasattr(model, 'freeze_blocks_until') and hasattr(model, 'blocks'):
            if opened < len(model.blocks):
                model.freeze_blocks_until(opened)
                optim = torch.optim.AdamW(
                    filter(lambda p: p.requires_grad, model.parameters()), lr=optim.param_groups[0]['lr'])
                print(f"[+] {ep}. epoch → Block-{opened+1} açıldı")
                no_improve_epochs = 0

        scheduler.step()
        current_lr = optim.param_groups[0]['lr']
        print(f"[Ep{ep:02d}] time={time.time()-t0:.1f}s | "
              f"train_acc={tr_a:.4f} | val_acc={vl_a:.4f} | "
              f"train_loss={tr_l:.4f} | val_loss={vl_l:.4f} | "
              f"lr={current_lr:.2e} | no_imp={no_improve_epochs} | "
              f"{time.time()-t0:.1f}s| "
              f"block : {sum([any(p.requires_grad for p in blk.parameters()) for blk in model.blocks])}")

        if vl_a >= 0.955:
            print(f"[✓] %{vl_a:.4f} doğruluk – eğitim bitti.")
            break

        # Log grafiklerini oluştur ve kaydet
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(train_acc_hist, label='Train Acc')
        plt.plot(val_acc_hist, label='Val Acc')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.title('Accuracy over epochs')

        plt.subplot(1, 2, 2)
        plt.plot(train_loss_hist, label='Train Loss')
        plt.plot(val_loss_hist, label='Val Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Loss over epochs')

        plt.tight_layout()
        plt.savefig(f'logs/epoch_{ep:02d}_logs.png')
        plt.close()

def freeze_support_for_win():
    mp.set_start_method("spawn", force=True)

if __name__ == "__main__":
    freeze_support_for_win()
    train()


In [None]:
train()  # Eğitim başlatılır, log görselleri logs/ klasörüne kaydedilir

# Son epoch log görselini göster
import matplotlib.pyplot as plt
import glob
log_imgs = sorted(glob.glob('logs/epoch_*_logs.png'))
if log_imgs:
    from IPython.display import Image, display
    display(Image(log_imgs[-1]))