In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from torchvision.models import densenet121
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import cv2


# =============================================================================
# LANGKAH 1: MODIFIKASI MODEL UNTUK MULTI-CLASS (CheXNet/DenseNet121)
# =============================================================================

import random

def set_seed(seed):
    """
    Fungsi untuk mengatur random seed agar hasil eksperimen reproducible.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # jika menggunakan multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Panggil fungsi ini sekali di awal skrip Anda
SEED = 42 # Angka 42 adalah konvensi, bisa diganti angka lain
set_seed(SEED)

print(f"Random seed diatur ke {SEED}")

class CheXNetModel(nn.Module):
    """
    Model CheXNet berbasis DenseNet121 untuk klasifikasi multi-class.
    """
    def _init_(self, num_classes, pretrained=True, checkpoint_path=None):
        super(CheXNetModel, self)._init_()
        
        # Load DenseNet121 sebagai backbone
        self.densenet = densenet121(pretrained=pretrained)
        
        # Ganti classifier untuk multi-class
        num_features = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, num_classes)
        )
        
        # Load pre-trained weights jika tersedia
        if checkpoint_path and os.path.exists(checkpoint_path):
            self.load_pretrained_weights(checkpoint_path, num_classes)
    
    def load_pretrained_weights(self, checkpoint_path, num_classes):
        """
        Load pre-trained CheXNet weights dan adaptasi untuk jumlah kelas yang berbeda.
        """
        try:
            # Coba load checkpoint
            if os.path.isdir(checkpoint_path):
                # Cari file .pth atau .pt dalam direktori
                for file in os.listdir(checkpoint_path):
                    if file.endswith(('.pth', '.pt')):
                        checkpoint_path = os.path.join(checkpoint_path, file)
                        break
            
            if checkpoint_path.endswith(('.pth', '.pt')):
                checkpoint = torch.load(checkpoint_path, map_location='cpu')
                
                # Handle different checkpoint formats
                if 'state_dict' in checkpoint:
                    state_dict = checkpoint['state_dict']
                elif 'model' in checkpoint:
                    state_dict = checkpoint['model']
                else:
                    state_dict = checkpoint
                
                # Remove 'module.' prefix if present (from DataParallel)
                new_state_dict = {}
                for k, v in state_dict.items():
                    name = k[7:] if k.startswith('module.') else k
                    new_state_dict[name] = v
                
                # Load weights, excluding final classifier if different num_classes
                model_dict = self.densenet.state_dict()
                pretrained_dict = {k: v for k, v in new_state_dict.items() 
                                 if k in model_dict and 'classifier' not in k}
                
                model_dict.update(pretrained_dict)
                self.densenet.load_state_dict(model_dict, strict=False)
                print(f"Pre-trained weights loaded from {checkpoint_path}")
                
        except Exception as e:
            print(f"Could not load pre-trained weights: {e}")
            print("Using ImageNet pre-trained weights instead.")
    
    def forward(self, x):
        return self.densenet(x)

# =============================================================================
# LANGKAH 2: DATASET CUSTOM UNTUK PYTORCH
# =============================================================================

class ImageDataset(Dataset):
    def _init_(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        
        if not is_test:
            # Bagian untuk train/val (TIDAK BERUBAH)
            self.dataset = datasets.ImageFolder(root_dir)
            self.samples = self.dataset.samples
            self.classes = self.dataset.classes
        else:
            # Bagian untuk test
            self.image_paths = []
            for file in sorted(os.listdir(root_dir)):
                if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.image_paths.append(os.path.join(root_dir, file))
    
    def _len_(self):
        if self.is_test:
            return len(self.image_paths)
        return len(self.samples)
    
    def _getitem_(self, idx):
        if self.is_test:
            # PASTIKAN BAGIAN INI BENAR: HANYA MENGEMBALIKAN PATH
            img_path = self.image_paths[idx]
            return img_path, os.path.basename(img_path) # <-- PERUBAHAN KUNCI
        else:
            # Bagian untuk train/val (TIDAK BERUBAH)
            img_path, label = self.samples[idx]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label


class FocalLoss(nn.Module):
    def _init_(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self)._init_()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt)**self.gamma * ce_loss
        
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss
# =============================================================================
# LANGKAH 3: KONFIGURASI DAN PERSIAPAN DATA
# =============================================================================

# Atur parameter utama
IMG_SIZE = (384, 384)
BATCH_SIZE = 16
EPOCHS = 30
LEARNING_RATE = 0.001

# Path ke direktori data
TRAIN_PATH = "/kaggle/input/srifoton-25-machine-learning-competition/train/train"
VAL_PATH = "/kaggle/input/srifoton-25-machine-learning-competition/val/val"
TEST_PATH = "/kaggle/input/srifoton-25-machine-learning-competition/test"
CHEXNET_WEIGHTS = '/kaggle/input/chexnet/pytorch/default/1'

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Buat custom transform untuk CLAHE
class ApplyCLAHE(object):
    def _init_(self, clip_limit=2.0, tile_grid_size=(8, 8)):
        self.clip_limit = clip_limit
        self.tile_grid_size = tile_grid_size
        self.clahe = cv2.createCLAHE(clipLimit=self.clip_limit, tileGridSize=self.tile_grid_size)

    def _call_(self, img):
        # Konversi PIL Image ke array numpy
        img_np = np.array(img)
        # Jika gambar berwarna, terapkan CLAHE pada channel L dari L*a*b* space
        if len(img_np.shape) == 3 and img_np.shape[2] == 3:
            lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
            l, a, b = cv2.split(lab)
            l_clahe = self.clahe.apply(l)
            lab_clahe = cv2.merge((l_clahe, a, b))
            img_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2RGB)
            return Image.fromarray(img_clahe)
        # Jika grayscale, terapkan langsung (walaupun kode Anda .convert('RGB'))
        else:
            gray_clahe = self.clahe.apply(img_np)
            return Image.fromarray(gray_clahe)

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(), # Gambar diubah menjadi Tensor
    # --- BARIS BARU DITAMBAHKAN DI SINI ---
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0, inplace=False),
    # ------------------------------------
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]) # Normalisasi setelahnya
])

val_test_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0), # <-- TERAPKAN JUGA PADA VALIDASI & TEST
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])

# Create datasets
print("Loading datasets...")
train_dataset = ImageDataset(TRAIN_PATH, transform=train_transform)
val_dataset = ImageDataset(VAL_PATH, transform=val_test_transform)
test_dataset = ImageDataset(TEST_PATH, transform=val_test_transform, is_test=True)

# Get class names
class_names = train_dataset.classes
num_classes = len(class_names)
print(f"Classes found: {class_names}")
print(f"Number of classes: {num_classes}")

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, 
                         shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, 
                       shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, 
                        shuffle=False, num_workers=4)

print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

# =============================================================================
# LANGKAH 4: MEMBUAT DAN MELATIH MODEL
# =============================================================================
model = CheXNetModel(num_classes=num_classes, pretrained=True, checkpoint_path=CHEXNET_WEIGHTS)
model = model.to(device)

def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in tqdm(train_loader, desc="Training"):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# Validation function
def validate_epoch(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc
# Loss function (sama seperti sebelumnya)
criterion = FocalLoss(gamma=2)

# --- TAHAP 1: PEMANASAN (TRAIN CLASSIFIER HEAD SAJA) ---
print("\n--- Starting Stage 1: Training Classifier Head ---")

# Bekukan semua lapisan kecuali classifier
for param in model.densenet.features.parameters():
    param.requires_grad = False

# Buat optimizer yang HANYA menargetkan classifier head
optimizer_head = optim.Adam(model.densenet.classifier.parameters(), lr=LEARNING_RATE)
epochs_head = 5  # Latih head selama 5 epoch

for epoch in range(epochs_head):
    print(f"\nHead Training Epoch [{epoch+1}/{epochs_head}]")
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer_head, device)
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    print(f"Head Train Loss: {train_loss:.4f}, Head Train Acc: {train_acc:.2f}%")
    print(f"Head Val Loss: {val_loss:.4f}, Head Val Acc: {val_acc:.2f}%")

# --- TAHAP 2: PENYEMPURNAAN (TRAIN SELURUH MODEL) ---
print("\n--- Starting Stage 2: Fine-tuning Full Model ---")

# Cairkan kembali seluruh lapisan model
for param in model.parameters():
    param.requires_grad = True

# Buat optimizer baru untuk seluruh model dengan LEARNING RATE LEBIH KECIL
optimizer_full = optim.AdamW(model.parameters(), lr=LEARNING_RATE / 10, weight_decay=1e-4) # Coba AdamW!
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer_full, mode='max', factor=0.5, patience=3, verbose=True) # Mode 'max' untuk akurasi

# Training loop utama Anda (sedikit dimodifikasi)
# Gunakan optimizer_full dan scheduler yang baru
print("\nStarting full model training...")
train_losses, train_accs = [], []
val_losses, val_accs = [], []
best_val_acc = 0.0
patience_counter = 0
patience = 7 # Mungkin perlu sedikit lebih sabar

for epoch in range(EPOCHS):
    print(f"\nEpoch [{epoch+1}/{EPOCHS}]")
    
    # Gunakan optimizer_full
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer_full, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Scheduler sekarang memantau akurasi validasi
    scheduler.step(val_acc)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    
    # Early stopping (sama seperti sebelumnya)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        patience_counter = 0
        print(f"New best validation accuracy: {best_val_acc:.2f}%")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

# Load best model
model.load_state_dict(torch.load('best_model.pth'))
print(f"Training completed. Best validation accuracy: {best_val_acc:.2f}%")

# =============================================================================
# LANGKAH 5: PLOT TRAINING HISTORY
# =============================================================================

plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
epochs_range = range(1, len(train_accs) + 1)
plt.plot(epochs_range, train_accs, label='Training Accuracy', marker='o')
plt.plot(epochs_range, val_accs, label='Validation Accuracy', marker='s')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_losses, label='Training Loss', marker='o')
plt.plot(epochs_range, val_losses, label='Validation Loss', marker='s')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

import os
import glob
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# =============================================================================
# BAGIAN 2: PROSES PREDIKSI
# =============================================================================

print("Memulai proses prediksi...")
base_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformasi dengan flip horizontal
hflip_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0),
    transforms.RandomHorizontalFlip(p=1.0), # p=1.0 agar pasti di-flip
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformasi dengan sedikit rotasi
rotate_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0),
    transforms.RandomRotation(degrees=10), # Rotasi acak +/- 10 derajat
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Kumpulkan semua transformasi dalam sebuah list
tta_transforms = [base_transform, hflip_transform, rotate_transform]
print(f"Menggunakan {len(tta_transforms)} transformasi untuk TTA.")


# --- Konfigurasi ---
IMG_SIZE = (384, 384)
BATCH_SIZE = 16
TEST_PATH = "/kaggle/input/srifoton-25-machine-learning-competition/test/test" 
TRAIN_PATH = "/kaggle/input/srifoton-25-machine-learning-competition/train/train"
MODEL_PATH = 'best_model.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# --- Dapatkan Nama Kelas dari Folder Training ---
# Ini penting untuk memastikan urutan kelas yang dipelajari model
temp_train_dataset = datasets.ImageFolder(TRAIN_PATH)
class_names_from_folder = temp_train_dataset.classes
num_classes = len(class_names_from_folder)
print(f"Urutan kelas yang dipelajari model: {class_names_from_folder}")

# --- Siapkan Model ---
print(f"Memuat model dari {MODEL_PATH}...")
model = CheXNetModel(num_classes=num_classes)
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()

# --- Siapkan Data Test ---
val_test_transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    ApplyCLAHE(clip_limit=2.0),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
test_dataset_for_tta = ImageDataset(TEST_PATH, is_test=True) 
test_loader_for_tta = DataLoader(test_dataset_for_tta, batch_size=1, shuffle=False, num_workers=0)
print(f"Menemukan {len(test_dataset_for_tta)} gambar untuk diprediksi dengan TTA.")

# --- Jalankan Prediksi dengan TTA ---
print("\nMembuat prediksi pada data tes menggunakan TTA...")
predictions_indices = []
filenames = []

with torch.no_grad():
    # Loop ini sekarang akan menerima path gambar, bukan objek gambar
    for image_path, filename in tqdm(test_loader_for_tta, desc="Testing with TTA"):
        
        current_filename = filename[0]
        
        # Buka gambar dari path di dalam loop
        current_image = Image.open(image_path[0]).convert('RGB')

        # Sisa loop TTA sama seperti sebelumnya
        tta_probs = torch.zeros(1, num_classes).to(device)
        
        for tta_transform in tta_transforms:
            transformed_image = tta_transform(current_image).unsqueeze(0).to(device)
            outputs = model(transformed_image)
            probs = nn.functional.softmax(outputs, dim=1)
            tta_probs += probs
            
        avg_probs = tta_probs / len(tta_transforms)
        _, predicted = avg_probs.max(1)
        
        predictions_indices.append(predicted.cpu().item())
        filenames.append(current_filename)

# ... Lanjutkan ke BAGIAN 3 untuk menyimpan hasil ...

class_to_number = {
    'Bacterial Pneumonia': 0,
    'Corona Virus Disease': 1,
    'Normal': 2,
    'Tuberculosis': 3,
    'Viral Pneumonia': 4
}

# 2. Buat pemetaan dari indeks output model ke nomor yang Anda inginkan
#    class_names_from_folder didapat dari ImageFolder (urut abjad)
#    Contoh: Jika model memprediksi indeks 0, itu berarti 'Bacterial Pneumonia',
#    lalu kita petakan ke nomor 0 dari class_to_number.
final_predictions = []
for idx in predictions_indices:
    # Dapatkan nama kelas dari indeks prediksi model
    class_name = class_names_from_folder[idx]
    # Dapatkan nomor yang sesuai dari pemetaan Anda
    number = class_to_number[class_name]
    final_predictions.append(number)

# 3. Buat DataFrame untuk submission
results_df = pd.DataFrame({
    'Id': filenames,
    'Predicted': final_predictions # Gunakan prediksi numerik final
})

# 4. Simpan ke file CSV
submission_path = 'submission.csv'
results_df.to_csv(submission_path, index=False)

print(f"\n✅ Prediksi selesai! File disimpan di: {submission_path}")
print("\nPreview 10 prediksi pertama:")
print(results_df.head(10))

print(f"\nDistribusi prediksi:")
# Tampilkan distribusi numerik
print(results_df['Predicted'].value_counts().sort_index())

import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Pastikan model sudah di-load dan dalam mode evaluasi
# Jika model belum di-load, jalankan baris ini:
# model.load_state_dict(torch.load('best_model.pth'))
# model = model.to(device)
model.eval()

all_preds = []
all_labels = []

print("Mengumpulkan prediksi dari validation set...")
with torch.no_grad():
    for images, labels in tqdm(val_loader, desc="Validating"):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 1. Tampilkan Laporan Klasifikasi (Precision, Recall, F1-Score)
print("\n" + "="*50)
print("Classification Report")
print("="*50)
print(classification_report(all_labels, all_preds, target_names=class_names))


# 2. Buat dan Tampilkan Confusion Matrix
print("\n" + "="*50)
print("Confusion Matrix")
print("="*50)
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.title('Confusion Matrix', fontsize=15)
plt.show()