In [None]:
# Cell 1: Load results
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc
import joblib

# Load predictions
from pathlib import Path
pred_files = list(Path('results/predictions').glob('*.csv'))

print("üìä Model Evaluation Report")
print("="*50)

for file in pred_files:
    df = pd.read_csv(file)
    model_name = file.stem.replace('_predictions', '').replace('_', ' ')
    
    acc = (df['true_label'] == df['predicted_label']).mean()
    
    print(f"\n{model_name}:")
    print(f"  Accuracy: {acc:.3f}")
    print(f"  Samples: {len(df)}")

# Cell 2: Best model analysis
best_model_info = pd.read_csv('models/best_model_info.csv')
print("\nüèÜ Best Model:")
print(best_model_info)

# Cell 3: Feature importance analysis (if XGBoost was best)
try:
    import joblib
    best_model = joblib.load(f'models/best_model_{best_model_info.iloc[0]["model_type"]}.pkl')
    
    if hasattr(best_model, 'feature_importances_'):
        importances = best_model.feature_importances_
        top_features = pd.DataFrame({
            'feature': range(len(importances)),
            'importance': importances
        }).nlargest(20, 'importance')
        
        plt.figure(figsize=(10, 6))
        plt.bar(range(20), top_features['importance'].values)
        plt.title('Top 20 Most Important Features')
        plt.xlabel('Feature Index')
        plt.ylabel('Importance')
        plt.show()
except:
    print("Could not load feature importances")