In [None]:
import os
import shutil
import pandas as pd


base_dir = "/content/skin-cancer-mnist"
image_dirs = [
    os.path.join(base_dir, "HAM10000_images_part_1"),
    os.path.join(base_dir, "HAM10000_images_part_2")
]
metadata_path = os.path.join(base_dir, "HAM10000_metadata.csv")

# Çıktı klasörü: ImageFolder formatında olacak
output_dir = os.path.join(base_dir, "ham10000_imagefolder")
os.makedirs(output_dir, exist_ok=True)

# Etiket verisini oku
df = pd.read_csv(metadata_path)

# Her görüntüyü uygun etiket klasörüne kopyala
missing = 0
for idx, row in df.iterrows():
    image_id = row["image_id"]
    label = row["dx"]
    image_filename = f"{image_id}.jpg"

    # Hedef klasör
    label_dir = os.path.join(output_dir, label)
    os.makedirs(label_dir, exist_ok=True)

    # İki klasörde arama
    found = False
    for image_dir in image_dirs:
        src = os.path.join(image_dir, image_filename)
        if os.path.exists(src):
            shutil.copy(src, os.path.join(label_dir, image_filename))
            found = True
            break

    if not found:
        missing += 1
        print(f"❌ Eksik görsel: {image_filename}")

print("✅ Tüm görseller etiketlere göre klasörlendi.")
print(f"Toplam eksik görsel sayısı: {missing}")

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix
import timm
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns

# Tekrarlanabilirlik
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Hiperparametreler
BATCH_SIZE = 32
EPOCHS = 15
LR = 1e-4
IMG_SIZE = 224
NUM_CLASSES = 7
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Veri kümesi yolu
DATASET_DIR = "/content/skin-cancer-mnist/ham10000_imagefolder"

# Transformlar
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

# Veri kümesini yükle
dataset = datasets.ImageFolder(DATASET_DIR, transform=transform)
train_len = int(0.7 * len(dataset))
val_len = int(0.15 * len(dataset))
test_len = len(dataset) - train_len - val_len
train_set, val_set, test_set = random_split(dataset, [train_len, val_len, test_len])
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE)

# Model isimleri
model_names = {
    "Simple_ViT": "vit_base_patch16_224",
    "DeiT": "deit_base_patch16_224",
    "Swin": "swin_base_patch4_window7_224"
}

results = {}
history = {}

def evaluate(loader, model, criterion=None):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            if criterion:
                loss = criterion(outputs, labels)
                total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    acc = correct / total
    avg_loss = total_loss / len(loader) if criterion else None
    return acc, avg_loss

def train_model(model, name):
    print(f"\n[+] Training model: {name}")
    model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    train_loss_list, train_acc_list = [], []
    val_loss_list, val_acc_list = [], []

    for epoch in range(EPOCHS):
        model.train()
        total_loss, correct, total = 0, 0, 0

        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        val_acc, val_loss = evaluate(val_loader, model, criterion)

        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        val_loss_list.append(val_loss)
        val_acc_list.append(val_acc)

        print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    history[name] = {
        "train_loss": train_loss_list,
        "train_acc": train_acc_list,
        "val_loss": val_loss_list,
        "val_acc": val_acc_list
    }
    return model

def evaluate_model(model, name):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(DEVICE)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1).cpu()
            y_true.extend(labels.numpy())
            y_pred.extend(preds.numpy())

    report = classification_report(y_true, y_pred, target_names=dataset.classes, output_dict=True)
    cm = confusion_matrix(y_true, y_pred)

    results[name] = {
        "report": report,
        "confusion_matrix": cm
    }

    print(f"\nClassification Report for {name}:")
    print(classification_report(y_true, y_pred, target_names=dataset.classes))
    print("Confusion Matrix:")
    print(cm)

    # Karmaşıklık Matrisi
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=dataset.classes, yticklabels=dataset.classes)
    plt.title(f'Confusion Matrix - {name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

# Her bir modeli eğit ve değerlendir
for model_name, timm_name in model_names.items():
    model = timm.create_model(timm_name, pretrained=True, num_classes=NUM_CLASSES)
    trained_model = train_model(model, model_name)
    evaluate_model(trained_model, model_name)

# Özety
print("\n====== SUMMARY ======")
for model_name in results:
    report = results[model_name]["report"]
    macro_f1 = report['macro avg']['f1-score']
    accuracy = report['accuracy']
    print(f"{model_name:<15} | Accuracy: {accuracy:.4f} | Macro F1: {macro_f1:.4f}")

# Eğitim ve doğrulama metrikleri
def plot_metrics(history_dict):
    plt.figure(figsize=(16, 6))

    # Loss grafiği
    plt.subplot(1, 2, 1)
    for model_name, metrics in history_dict.items():
        plt.plot(metrics["train_loss"], label=f'{model_name} Train')
        plt.plot(metrics["val_loss"], linestyle='--', label=f'{model_name} Val')
    plt.title("Loss per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)

    # Accuracy grafiği
    plt.subplot(1, 2, 2)
    for model_name, metrics in history_dict.items():
        plt.plot(metrics["train_acc"], label=f'{model_name} Train')
        plt.plot(metrics["val_acc"], linestyle='--', label=f'{model_name} Val')
    plt.title("Accuracy per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

plot_metrics(history)