In [1]:
# ============================================================================
# CELL 1: ENVIRONMENT SETUP
# ============================================================================
import sys
import os
from pathlib import Path

print("=" * 70)
print("PHASE 3: BASELINE TRAINING SETUP")
print("=" * 70)

# Mount Google Drive
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IN_COLAB = True
    print("‚úÖ Google Colab detected, Drive mounted")
except ImportError:
    IN_COLAB = False
    print("‚úÖ Local environment detected")

# Clone/update repository
if IN_COLAB:
    REPO_PATH = Path('/content/tri-objective-robust-xai-medimg')
    if not REPO_PATH.exists():
        !git clone https://github.com/viraj1011JAIN/tri-objective-robust-xai-medimg.git {REPO_PATH}
        print("‚úÖ Repository cloned")
    else:
        os.chdir(REPO_PATH)
        !git pull origin main
        print("‚úÖ Repository updated")
    
    os.chdir(REPO_PATH)
    sys.path.insert(0, str(REPO_PATH))
    PROJECT_ROOT = REPO_PATH
else:
    PROJECT_ROOT = Path.cwd().parent
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"üìÅ Project root: {PROJECT_ROOT}")

PHASE 3: BASELINE TRAINING SETUP
‚úÖ Local environment detected
üìÅ Project root: c:\Users\Dissertation\tri-objective-robust-xai-medimg


In [None]:
# ============================================================================
# CELL 2: INSTALL DEPENDENCIES
# ============================================================================
!pip install -q torch torchvision --index-url https://download.pytorch.org/whl/cu121
!pip install -q timm albumentations scikit-learn pandas matplotlib seaborn tqdm mlflow
print("‚úÖ Dependencies installed")

‚úÖ Dependencies installed


In [3]:
# ============================================================================
# CELL 3: IMPORTS
# ============================================================================
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from pathlib import Path
import json
import time
import warnings
warnings.filterwarnings('ignore')

# Albumentations for transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Project imports
from src.datasets.isic import ISICDataset
from src.models.build import build_model
from src.utils.reproducibility import set_global_seed

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úÖ Using device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

‚úÖ Using device: cuda
   GPU: NVIDIA GeForce RTX 3050 Laptop GPU
   Memory: 4.3 GB


In [None]:
# ============================================================================
# CELL 4: CONFIGURATION (OPTIMIZED FOR A100 40GB)
# ============================================================================
print("=" * 70)
print("CONFIGURATION (A100 OPTIMIZED)")
print("=" * 70)

# Enable TF32 for faster matrix operations on A100
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.benchmark = True  # Auto-tune convolution algorithms

CONFIG = {
    # Data paths (Google Drive)
    'data_root': Path('/content/drive/MyDrive/data/data/isic_2018'),
    'checkpoint_dir': Path('/content/drive/MyDrive/checkpoints/baseline'),
    'results_dir': Path('/content/drive/MyDrive/results/phase3'),
    
    # Model
    'model_name': 'resnet50',
    'num_classes': 7,
    'pretrained': True,
    
    # Training (OPTIMIZED for A100 40GB)
    'epochs': 30,
    'batch_size': 128,          # Increased from 32 ‚Üí 128 for A100
    'learning_rate': 3e-4,      # Scaled up with larger batch
    'weight_decay': 1e-4,
    'num_workers': 4,           # More workers for faster data loading
    'pin_memory': True,
    'use_amp': True,            # Mixed precision training (FP16)
    
    # Image
    'image_size': 224,
    
    # Seeds for reproducibility
    'seeds': [42, 123, 456],
    
    # Class names
    'class_names': ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC'],
}

# Create directories
CONFIG['checkpoint_dir'].mkdir(parents=True, exist_ok=True)
CONFIG['results_dir'].mkdir(parents=True, exist_ok=True)

for seed in CONFIG['seeds']:
    (CONFIG['checkpoint_dir'] / f'seed_{seed}').mkdir(exist_ok=True)

print(f"üìä Model: {CONFIG['model_name']}")
print(f"üìä Classes: {CONFIG['num_classes']}")
print(f"üìä Epochs: {CONFIG['epochs']}")
print(f"üìä Batch size: {CONFIG['batch_size']} (optimized for A100)")
print(f"üìä Mixed Precision (AMP): {CONFIG['use_amp']}")
print(f"üìä Seeds: {CONFIG['seeds']}")
print(f"üìÅ Data: {CONFIG['data_root']}")
print(f"üìÅ Checkpoints: {CONFIG['checkpoint_dir']}")

# Show GPU memory
if torch.cuda.is_available():
    total_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"\nüéÆ GPU Memory: {total_mem:.1f} GB")
    print(f"üöÄ TF32 enabled: {torch.backends.cuda.matmul.allow_tf32}")
    print(f"üöÄ cuDNN benchmark: {torch.backends.cudnn.benchmark}")

CONFIGURATION
üìä Model: resnet50
üìä Classes: 7
üìä Epochs: 30
üìä Batch size: 32
üìä Seeds: [42, 123, 456]
üìÅ Data: \content\drive\MyDrive\data\data\isic_2018
üìÅ Checkpoints: \content\drive\MyDrive\checkpoints\baseline


In [5]:
# ============================================================================
# CELL 5: DATA PREPARATION
# ============================================================================
print("=" * 70)
print("DATA PREPARATION")
print("=" * 70)

# Fix metadata paths (Windows backslashes ‚Üí forward slashes)
metadata_path = CONFIG['data_root'] / 'metadata.csv'
print(f"üìÑ Loading metadata: {metadata_path}")

df = pd.read_csv(metadata_path)
print(f"   Total samples: {len(df)}")

# Convert backslashes to forward slashes
if 'image_path' in df.columns:
    df['image_path'] = df['image_path'].str.replace('\\', '/', regex=False)
    print("   ‚úÖ Fixed path separators")

# Save fixed metadata
fixed_path = CONFIG['data_root'] / 'metadata_fixed.csv'
df.to_csv(fixed_path, index=False)
print(f"   ‚úÖ Saved to: {fixed_path}")

# Show split distribution
print(f"\nüìä Split Distribution:")
print(df['split'].value_counts())

# Show class distribution
print(f"\nüìä Class Distribution:")
print(df['label'].value_counts())

DATA PREPARATION
üìÑ Loading metadata: \content\drive\MyDrive\data\data\isic_2018\metadata.csv


FileNotFoundError: [Errno 2] No such file or directory: '\\content\\drive\\MyDrive\\data\\data\\isic_2018\\metadata.csv'

In [None]:
# ============================================================================
# CELL 6: CREATE TRANSFORMS & DATASETS
# ============================================================================
print("=" * 70)
print("CREATING DATASETS")
print("=" * 70)

# Training transforms (with augmentation)
train_transforms = A.Compose([
    A.Resize(CONFIG['image_size'], CONFIG['image_size']),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# Validation/Test transforms (no augmentation)
val_transforms = A.Compose([
    A.Resize(CONFIG['image_size'], CONFIG['image_size']),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

# Create datasets
train_dataset = ISICDataset(
    root=str(CONFIG['data_root']),
    split='train',
    transforms=train_transforms,
    csv_path=str(fixed_path),
    image_column='image_path',
    label_column='label'
)

val_dataset = ISICDataset(
    root=str(CONFIG['data_root']),
    split='val',
    transforms=val_transforms,
    csv_path=str(fixed_path),
    image_column='image_path',
    label_column='label'
)

test_dataset = ISICDataset(
    root=str(CONFIG['data_root']),
    split='test',
    transforms=val_transforms,
    csv_path=str(fixed_path),
    image_column='image_path',
    label_column='label'
)

print(f"‚úÖ Train samples: {len(train_dataset)}")
print(f"‚úÖ Val samples: {len(val_dataset)}")
print(f"‚úÖ Test samples: {len(test_dataset)}")
print(f"‚úÖ Classes: {train_dataset.class_names}")

In [None]:
# ============================================================================
# CELL 7: TRAINING FUNCTIONS (WITH MIXED PRECISION)
# ============================================================================
from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score
from torch.cuda.amp import GradScaler, autocast

def train_one_epoch(model, dataloader, criterion, optimizer, device, scaler=None, use_amp=True):
    """Train for one epoch with optional mixed precision."""
    model.train()
    running_loss = 0.0
    all_preds, all_labels = [], []
    
    pbar = tqdm(dataloader, desc='Training', leave=False)
    for batch in pbar:
        # Handle (images, labels, meta) format
        if len(batch) == 3:
            images, labels, _ = batch
        else:
            images, labels = batch
            
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        
        optimizer.zero_grad(set_to_none=True)  # Faster than zero_grad()
        
        # Mixed precision forward pass
        with autocast(enabled=use_amp):
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        # Mixed precision backward pass
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(all_labels, all_preds) * 100
    return epoch_loss, epoch_acc


def evaluate(model, dataloader, criterion, device, use_amp=True):
    """Evaluate model with optional mixed precision."""
    model.eval()
    running_loss = 0.0
    all_preds, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc='Evaluating', leave=False):
            if len(batch) == 3:
                images, labels, _ = batch
            else:
                images, labels = batch
                
            images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            
            with autocast(enabled=use_amp):
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            probs = torch.softmax(outputs.float(), dim=1)  # Convert to float32 for softmax
            preds = outputs.argmax(dim=1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    
    epoch_loss = running_loss / len(dataloader.dataset)
    all_probs = np.array(all_probs)
    all_labels = np.array(all_labels)
    
    metrics = {
        'loss': epoch_loss,
        'accuracy': accuracy_score(all_labels, all_preds) * 100,
        'balanced_accuracy': balanced_accuracy_score(all_labels, all_preds) * 100,
        'f1_macro': f1_score(all_labels, all_preds, average='macro') * 100,
    }
    
    # AUROC (one-vs-rest)
    try:
        metrics['auroc'] = roc_auc_score(all_labels, all_probs, multi_class='ovr') * 100
    except:
        metrics['auroc'] = 0.0
    
    return metrics, all_probs, all_labels, all_preds

print("‚úÖ Training functions defined (with Mixed Precision support)")

In [None]:
# ============================================================================
# CELL 8: MAIN TRAINING LOOP (OPTIMIZED FOR A100)
# ============================================================================
print("=" * 70)
print("BASELINE TRAINING - ALL SEEDS (A100 OPTIMIZED)")
print("=" * 70)

all_seed_results = {}
training_history = {}

# Initialize GradScaler for mixed precision
scaler = GradScaler() if CONFIG['use_amp'] else None
print(f"üöÄ Mixed Precision (AMP): {'Enabled' if CONFIG['use_amp'] else 'Disabled'}")

for seed_idx, seed in enumerate(CONFIG['seeds']):
    print(f"\n{'='*70}")
    print(f"SEED {seed} ({seed_idx+1}/{len(CONFIG['seeds'])})")
    print(f"{'='*70}")
    
    # Set seed for reproducibility
    set_global_seed(seed)
    
    # Clear GPU cache before each seed
    torch.cuda.empty_cache()
    
    # Create data loaders (optimized)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=CONFIG['batch_size'],
        shuffle=True, 
        num_workers=CONFIG['num_workers'], 
        pin_memory=CONFIG['pin_memory'],
        persistent_workers=True,  # Keep workers alive between epochs
        prefetch_factor=2         # Prefetch batches
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=CONFIG['batch_size'] * 2,  # Larger batch for eval (no gradients)
        shuffle=False, 
        num_workers=CONFIG['num_workers'], 
        pin_memory=CONFIG['pin_memory'],
        persistent_workers=True
    )
    test_loader = DataLoader(
        test_dataset, 
        batch_size=CONFIG['batch_size'] * 2,
        shuffle=False, 
        num_workers=CONFIG['num_workers'], 
        pin_memory=CONFIG['pin_memory']
    )
    
    # Build model
    model = build_model(
        architecture=CONFIG['model_name'],
        num_classes=CONFIG['num_classes'],
        pretrained=CONFIG['pretrained']
    ).to(device)
    
    # Compile model for faster execution (PyTorch 2.0+)
    if hasattr(torch, 'compile'):
        try:
            model = torch.compile(model, mode='reduce-overhead')
            print("   üöÄ Model compiled with torch.compile()")
        except:
            print("   ‚ö†Ô∏è torch.compile() not available, using eager mode")
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(
        model.parameters(),
        lr=CONFIG['learning_rate'],
        weight_decay=CONFIG['weight_decay']
    )
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=CONFIG['epochs'], eta_min=1e-6
    )
    
    # Reset scaler for each seed
    if CONFIG['use_amp']:
        scaler = GradScaler()
    
    # Training history
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'val_auroc': []}
    best_val_auroc = 0.0
    
    print(f"\nüìä Training for {CONFIG['epochs']} epochs...")
    print(f"   Batch size: {CONFIG['batch_size']} | LR: {CONFIG['learning_rate']}")
    start_time = time.time()
    
    for epoch in range(CONFIG['epochs']):
        # Train with mixed precision
        train_loss, train_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device, 
            scaler=scaler, use_amp=CONFIG['use_amp']
        )
        
        # Validate
        val_metrics, _, _, _ = evaluate(
            model, val_loader, criterion, device, 
            use_amp=CONFIG['use_amp']
        )
        
        # Update scheduler
        scheduler.step()
        
        # Record history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_metrics['loss'])
        history['val_acc'].append(val_metrics['accuracy'])
        history['val_auroc'].append(val_metrics['auroc'])
        
        # Save best model
        if val_metrics['auroc'] > best_val_auroc:
            best_val_auroc = val_metrics['auroc']
            checkpoint_path = CONFIG['checkpoint_dir'] / f'seed_{seed}' / 'best.pt'
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_auroc': best_val_auroc,
                'config': {k: str(v) if isinstance(v, Path) else v for k, v in CONFIG.items()},
            }, checkpoint_path)
        
        # Print progress every 5 epochs
        if (epoch + 1) % 5 == 0 or epoch == 0:
            gpu_mem = torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0
            print(f"  Epoch {epoch+1:2d}/{CONFIG['epochs']} | "
                  f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.1f}% | "
                  f"Val Acc: {val_metrics['accuracy']:.1f}% | Val AUROC: {val_metrics['auroc']:.1f}% | "
                  f"GPU: {gpu_mem:.1f}GB")
    
    elapsed = time.time() - start_time
    samples_per_sec = len(train_dataset) * CONFIG['epochs'] / elapsed
    print(f"\n‚è±Ô∏è  Training time: {elapsed/60:.1f} minutes ({samples_per_sec:.0f} samples/sec)")
    
    # Final test evaluation
    print(f"\nüìä Final Test Evaluation...")
    
    # Load best model
    checkpoint = torch.load(checkpoint_path, weights_only=False)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    test_metrics, test_probs, test_labels, test_preds = evaluate(
        model, test_loader, criterion, device, use_amp=CONFIG['use_amp']
    )
    
    print(f"\n‚úÖ SEED {seed} RESULTS:")
    print(f"   Test Accuracy: {test_metrics['accuracy']:.2f}%")
    print(f"   Test Balanced Acc: {test_metrics['balanced_accuracy']:.2f}%")
    print(f"   Test AUROC: {test_metrics['auroc']:.2f}%")
    print(f"   Test F1 (macro): {test_metrics['f1_macro']:.2f}%")
    print(f"   Checkpoint: {checkpoint_path}")
    
    # Store results
    all_seed_results[seed] = test_metrics
    training_history[seed] = history
    
    # Free memory
    del model, optimizer, scheduler
    torch.cuda.empty_cache()

print(f"\n{'='*70}")
print("ALL SEEDS COMPLETE")
print(f"{'='*70}")

In [None]:
# ============================================================================
# CELL 9: AGGREGATE RESULTS
# ============================================================================
print("=" * 70)
print("AGGREGATED RESULTS (MEAN ¬± STD)")
print("=" * 70)

# Aggregate across seeds
metrics_list = ['accuracy', 'balanced_accuracy', 'auroc', 'f1_macro']

print("\nüìä Test Set Performance:")
print("-" * 50)

aggregated = {}
for metric in metrics_list:
    values = [all_seed_results[seed][metric] for seed in CONFIG['seeds']]
    mean_val = np.mean(values)
    std_val = np.std(values)
    aggregated[metric] = {'mean': mean_val, 'std': std_val}
    print(f"   {metric:20s}: {mean_val:.2f}% ¬± {std_val:.2f}%")

print("\nüìä Per-Seed Results:")
print("-" * 50)
for seed in CONFIG['seeds']:
    r = all_seed_results[seed]
    print(f"   Seed {seed}: Acc={r['accuracy']:.1f}%, AUROC={r['auroc']:.1f}%")

# Save results
results_file = CONFIG['results_dir'] / 'baseline_results.json'
with open(results_file, 'w') as f:
    json.dump({
        'per_seed': {str(k): v for k, v in all_seed_results.items()},
        'aggregated': aggregated,
        'config': {k: str(v) if isinstance(v, Path) else v for k, v in CONFIG.items()}
    }, f, indent=2)
print(f"\n‚úÖ Results saved to: {results_file}")

In [None]:
# ============================================================================
# CELL 10: VISUALIZATION - TRAINING CURVES
# ============================================================================
print("=" * 70)
print("TRAINING VISUALIZATIONS")
print("=" * 70)

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
colors = ['#2ecc71', '#3498db', '#e74c3c']

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Plot 1: Training Loss
ax = axes[0, 0]
for i, seed in enumerate(CONFIG['seeds']):
    ax.plot(training_history[seed]['train_loss'], label=f'Seed {seed}', color=colors[i], linewidth=2)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Training Loss', fontsize=12)
ax.set_title('Training Loss Curves', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Plot 2: Validation Loss
ax = axes[0, 1]
for i, seed in enumerate(CONFIG['seeds']):
    ax.plot(training_history[seed]['val_loss'], label=f'Seed {seed}', color=colors[i], linewidth=2)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Validation Loss', fontsize=12)
ax.set_title('Validation Loss Curves', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Plot 3: Validation Accuracy
ax = axes[1, 0]
for i, seed in enumerate(CONFIG['seeds']):
    ax.plot(training_history[seed]['val_acc'], label=f'Seed {seed}', color=colors[i], linewidth=2)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Validation Accuracy (%)', fontsize=12)
ax.set_title('Validation Accuracy Curves', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Plot 4: Validation AUROC
ax = axes[1, 1]
for i, seed in enumerate(CONFIG['seeds']):
    ax.plot(training_history[seed]['val_auroc'], label=f'Seed {seed}', color=colors[i], linewidth=2)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Validation AUROC (%)', fontsize=12)
ax.set_title('Validation AUROC Curves', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

plt.suptitle('ResNet-50 Baseline Training on ISIC 2018\n(3 Seeds for Statistical Robustness)', 
             fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()

# Save figure
fig_path = CONFIG['results_dir'] / 'training_curves.png'
plt.savefig(fig_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {fig_path}")
plt.show()

In [None]:
# ============================================================================
# CELL 11: VISUALIZATION - FINAL RESULTS BAR CHART
# ============================================================================
fig, ax = plt.subplots(figsize=(10, 6))

metrics = ['Accuracy', 'Balanced Acc', 'AUROC', 'F1 (macro)']
metric_keys = ['accuracy', 'balanced_accuracy', 'auroc', 'f1_macro']

x = np.arange(len(metrics))
width = 0.25

for i, seed in enumerate(CONFIG['seeds']):
    values = [all_seed_results[seed][k] for k in metric_keys]
    bars = ax.bar(x + i*width, values, width, label=f'Seed {seed}', color=colors[i], alpha=0.8)
    
    # Add value labels
    for bar, val in zip(bars, values):
        ax.annotate(f'{val:.1f}', xy=(bar.get_x() + bar.get_width()/2, bar.get_height()),
                   xytext=(0, 3), textcoords='offset points', ha='center', fontsize=9)

ax.set_ylabel('Score (%)', fontsize=12)
ax.set_title('Baseline Model Performance by Seed\nISIC 2018 Test Set', fontsize=14, fontweight='bold')
ax.set_xticks(x + width)
ax.set_xticklabels(metrics, fontsize=11)
ax.legend(loc='lower right')
ax.set_ylim(0, 100)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
fig_path = CONFIG['results_dir'] / 'seed_comparison.png'
plt.savefig(fig_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {fig_path}")
plt.show()

In [None]:
# ============================================================================
# CELL 12: FINAL SUMMARY
# ============================================================================
print("=" * 70)
print("PHASE 3 COMPLETE - FINAL SUMMARY")
print("=" * 70)

print("\nüìä BASELINE MODEL PERFORMANCE (Mean ¬± Std):")
print("-" * 50)
print(f"   Accuracy:      {aggregated['accuracy']['mean']:.2f}% ¬± {aggregated['accuracy']['std']:.2f}%")
print(f"   Balanced Acc:  {aggregated['balanced_accuracy']['mean']:.2f}% ¬± {aggregated['balanced_accuracy']['std']:.2f}%")
print(f"   AUROC:         {aggregated['auroc']['mean']:.2f}% ¬± {aggregated['auroc']['std']:.2f}%")
print(f"   F1 (macro):    {aggregated['f1_macro']['mean']:.2f}% ¬± {aggregated['f1_macro']['std']:.2f}%")

print("\nüìÅ SAVED CHECKPOINTS:")
print("-" * 50)
for seed in CONFIG['seeds']:
    ckpt_path = CONFIG['checkpoint_dir'] / f'seed_{seed}' / 'best.pt'
    if ckpt_path.exists():
        size_mb = ckpt_path.stat().st_size / (1024*1024)
        print(f"   ‚úÖ seed_{seed}/best.pt ({size_mb:.1f} MB)")
    else:
        print(f"   ‚ùå seed_{seed}/best.pt - NOT FOUND")

print("\nüéØ NEXT STEPS:")
print("-" * 50)
print("   1. Run Phase 4 notebook for adversarial robustness evaluation")
print("   2. Use these checkpoints as baseline comparison")
print("   3. Proceed to Phase 5 tri-objective robust training")

print("\n" + "=" * 70)
print("‚úÖ PHASE 3 BASELINE TRAINING COMPLETE!")
print("=" * 70)