In [None]:
```xml
<VSCode.Cell id="model_001" language="markdown">
# 03 - Model Training & Comparison

This notebook covers model training, hyperparameter tuning, and comprehensive evaluation for fraud detection.

## Components
1. Data preparation and feature scaling
2. Model training (Logistic Regression, Random Forest, XGBoost)
3. Class imbalance handling with SMOTE
4. Model evaluation and comparison
5. Hyperparameter tuning
6. Ensemble methods
7. Feature importance analysis
8. Business metrics evaluation
</VSCode.Cell>

<VSCode.Cell id="model_002" language="python">
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import sys
sys.path.append('..')

from src.trainer import ModelTrainer
from src.evaluator import ModelEvaluator
from src.config import ModelConfig
from src.preprocessor import FeatureEngineer, DataPreprocessor

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)
</VSCode.Cell>

<VSCode.Cell id="model_003" language="python">
# Create sample data for modeling
print("="*60)
print("STEP 1: DATA PREPARATION")
print("="*60)

np.random.seed(42)
n_samples = 1000
n_features = 15

# Generate synthetic data
X = np.random.randn(n_samples, n_features)
y = np.random.binomial(1, 0.3, n_samples)  # 30% fraud rate

# Train-test split with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print(f"\n✓ Dataset created successfully")
print(f"Training set size: {X_train.shape[0]} samples × {X_train.shape[1]} features")
print(f"Test set size: {X_test.shape[0]} samples")
print(f"\nClass distribution (Train):")
print(f"  Legitimate: {(y_train == 0).sum()} ({(y_train == 0).sum()/len(y_train)*100:.1f}%)")
print(f"  Fraudulent: {(y_train == 1).sum()} ({(y_train == 1).sum()/len(y_train)*100:.1f}%)")
print(f"\nClass distribution (Test):")
print(f"  Legitimate: {(y_test == 0).sum()} ({(y_test == 0).sum()/len(y_test)*100:.1f}%)")
print(f"  Fraudulent: {(y_test == 1).sum()} ({(y_test == 1).sum()/len(y_test)*100:.1f}%)")
</VSCode.Cell>

<VSCode.Cell id="model_004" language="python">
# Step 2: Initialize Models
print("\n" + "="*60)
print("STEP 2: MODEL INITIALIZATION & TRAINING")
print("="*60)

# Initialize trainer
trainer = ModelTrainer()

print("\nTraining Models...")
print("-" * 60)

# Train all models
print("1. Training Logistic Regression...")
lr_model = trainer.train_logistic_regression(X_train, y_train)
print("   ✓ Complete")

print("2. Training Random Forest...")
rf_model = trainer.train_random_forest(X_train, y_train)
print("   ✓ Complete")

print("3. Training XGBoost...")
xgb_model = trainer.train_xgboost(X_train, y_train)
print("   ✓ Complete")

print("\n✓ All models trained successfully")
</VSCode.Cell>

<VSCode.Cell id="model_005" language="python">
# Step 3: Make Predictions
print("\n" + "="*60)
print("STEP 3: GENERATING PREDICTIONS")
print("="*60)

models_dict = {
    'Logistic Regression': lr_model,
    'Random Forest': rf_model,
    'XGBoost': xgb_model
}

# Generate predictions for all models
predictions = {}
probabilities = {}

for name, model in models_dict.items():
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    predictions[name] = y_pred
    probabilities[name] = y_pred_proba
    
    print(f"\n{name}:")
    print(f"  Predicted labels generated: {len(y_pred)}")
    print(f"  Fraud predictions: {(y_pred == 1).sum()}")
    print(f"  Legitimate predictions: {(y_pred == 0).sum()}")
</VSCode.Cell>

<VSCode.Cell id="model_006" language="python">
# Step 4: Model Evaluation
print("\n" + "="*60)
print("STEP 4: MODEL EVALUATION")
print("="*60)

evaluator = ModelEvaluator()
evaluation_results = {}

print("\nComputing evaluation metrics...")
print("-" * 60)

for name, y_pred in predictions.items():
    print(f"\nEvaluating {name}...")
    y_pred_proba = probabilities[name]
    
    results = evaluator.evaluate(y_test, y_pred, y_pred_proba)
    evaluation_results[name] = results
    
    print(f"  Accuracy: {results['accuracy']:.4f}")
    print(f"  Precision: {results['precision']:.4f}")
    print(f"  Recall: {results['recall']:.4f}")
    print(f"  F1-Score: {results['f1']:.4f}")
    print(f"  ROC-AUC: {results['roc_auc']:.4f}")
</VSCode.Cell>

<VSCode.Cell id="model_007" language="python">
# Create comprehensive comparison table
print("\n" + "="*60)
print("MODEL COMPARISON")
print("="*60)

comparison_df = pd.DataFrame({
    'Model': evaluation_results.keys(),
    'Accuracy': [results['accuracy'] for results in evaluation_results.values()],
    'Precision': [results['precision'] for results in evaluation_results.values()],
    'Recall': [results['recall'] for results in evaluation_results.values()],
    'F1-Score': [results['f1'] for results in evaluation_results.values()],
    'ROC-AUC': [results['roc_auc'] for results in evaluation_results.values()],
    'Specificity': [results['specificity'] for results in evaluation_results.values()],
    'FPR': [results['false_positive_rate'] for results in evaluation_results.values()]
})

print("\n" + comparison_df.round(4).to_string(index=False))

# Find best model by ROC-AUC
best_model_idx = comparison_df['ROC-AUC'].idxmax()
best_model_name = comparison_df.loc[best_model_idx, 'Model']
print(f"\n✓ Best Model (by ROC-AUC): {best_model_name}")
</VSCode.Cell>

<VSCode.Cell id="model_008" language="python">
# Visualize model comparison
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
axes = axes.flatten()

metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Specificity']
colors = ['#3498db', '#e74c3c', '#2ecc71']

for idx, metric in enumerate(metrics):
    ax = axes[idx]
    
    values = comparison_df[metric].values
    models = comparison_df['Model'].values
    
    bars = ax.bar(models, values, color=colors, alpha=0.7, edgecolor='black')
    ax.set_ylabel(metric, fontsize=11)
    ax.set_title(f'{metric} Comparison', fontsize=12, fontweight='bold')
    ax.set_ylim([0, 1])
    ax.grid(axis='y', alpha=0.3)
    
    # Rotate x-axis labels
    ax.set_xticklabels(models, rotation=45, ha='right')
    
    # Add value labels
    for bar, value in zip(bars, values):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{value:.3f}', ha='center', va='bottom', fontsize=9)

plt.suptitle('Model Performance Comparison', fontsize=14, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()
</VSCode.Cell>

<VSCode.Cell id="model_009" language="python">
# Step 5: Confusion Matrices
print("\n" + "="*60)
print("STEP 5: CONFUSION MATRICES")
print("="*60)

from sklearn.metrics import confusion_matrix

fig, axes = plt.subplots(1, 3, figsize=(16, 4))

for idx, (name, y_pred) in enumerate(predictions.items()):
    ax = axes[idx]
    
    cm = confusion_matrix(y_test, y_pred)
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                xticklabels=['Legitimate', 'Fraud'],
                yticklabels=['Legitimate', 'Fraud'],
                cbar_kws={'label': 'Count'})
    
    ax.set_ylabel('Actual', fontsize=11)
    ax.set_xlabel('Predicted', fontsize=11)
    ax.set_title(f'{name}\nConfusion Matrix', fontsize=12, fontweight='bold')

plt.suptitle('Model Confusion Matrices', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

# Detailed confusion matrix interpretation
print("\nConfusion Matrix Interpretation:")
print("TN (True Negative):  Legitimate correctly identified")
print("FP (False Positive): Legitimate incorrectly marked as fraud (COST)")
print("FN (False Negative): Fraud incorrectly accepted as legitimate (RISK)")
print("TP (True Positive):  Fraud correctly detected")
</VSCode.Cell>

<VSCode.Cell id="model_010" language="python">
# Step 6: ROC Curves
print("\n" + "="*60)
print("STEP 6: ROC CURVES COMPARISON")
print("="*60)

from sklearn.metrics import roc_curve, auc

fig, ax = plt.subplots(figsize=(10, 8))

colors_roc = ['#3498db', '#e74c3c', '#2ecc71']

for (name, y_pred_proba), color in zip(probabilities.items(), colors_roc):
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    
    ax.plot(fpr, tpr, color=color, lw=2.5, 
            label=f'{name} (AUC = {roc_auc:.3f})')

# Random classifier line
ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier (AUC = 0.5)')

ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate', fontsize=12)
ax.set_ylabel('True Positive Rate', fontsize=12)
ax.set_title('ROC Curves - Model Comparison', fontsize=14, fontweight='bold')
ax.legend(loc="lower right", fontsize=11)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\n✓ ROC curves generated")
print("Note: Higher AUC indicates better discrimination between fraud and legitimate cases")
</VSCode.Cell>

<VSCode.Cell id="model_011" language="python">
# Step 7: Precision-Recall Curves
print("\n" + "="*60)
print("STEP 7: PRECISION-RECALL CURVES")
print("="*60)

from sklearn.metrics import precision_recall_curve, average_precision_score

fig, ax = plt.subplots(figsize=(10, 8))

colors_pr = ['#3498db', '#e74c3c', '#2ecc71']

for (name, y_pred_proba), color in zip(probabilities.items(), colors_pr):
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    avg_precision = average_precision_score(y_test, y_pred_proba)
    
    ax.plot(recall, precision, color=color, lw=2.5,
            label=f'{name} (AP = {avg_precision:.3f})')

# Baseline
baseline_precision = (y_test == 1).sum() / len(y_test)
ax.axhline(y=baseline_precision, color='k', linestyle='--', lw=2, 
           label=f'Baseline (Precision = {baseline_precision:.3f})')

ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('Recall (Fraud Detection Rate)', fontsize=12)
ax.set_ylabel('Precision', fontsize=12)
ax.set_title('Precision-Recall Curves - Model Comparison', fontsize=14, fontweight='bold')
ax.legend(loc="lower left", fontsize=11)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\n✓ Precision-Recall curves generated")
print("Note: This curve is more informative for imbalanced datasets than ROC")
</VSCode.Cell>

<VSCode.Cell id="model_012" language="python">
# Step 8: Feature Importance (Random Forest & XGBoost)
print("\n" + "="*60)
print("STEP 8: FEATURE IMPORTANCE ANALYSIS")
print("="*60)

feature_names = [f'Feature_{i+1}' for i in range(X_train.shape[1])]

# Get feature importance for tree-based models
rf_importance = rf_model.feature_importances_
xgb_importance = xgb_model.feature_importances_

# Create DataFrames
rf_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': rf_importance
}).sort_values('Importance', ascending=False).head(10)

xgb_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': xgb_importance
}).sort_values('Importance', ascending=False).head(10)

print("\nTop 10 Most Important Features (Random Forest):")
print(rf_importance_df.reset_index(drop=True).to_string(index=False))

print("\nTop 10 Most Important Features (XGBoost):")
print(xgb_importance_df.reset_index(drop=True).to_string(index=False))

# Visualize feature importance
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Random Forest
ax = axes[0]
ax.barh(rf_importance_df['Feature'].iloc[::-1], rf_importance_df['Importance'].iloc[::-1],
        color='#3498db', alpha=0.7, edgecolor='black')
ax.set_xlabel('Importance', fontsize=11)
ax.set_title('Top 10 Features - Random Forest', fontsize=12, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

# XGBoost
ax = axes[1]
ax.barh(xgb_importance_df['Feature'].iloc[::-1], xgb_importance_df['Importance'].iloc[::-1],
        color='#e74c3c', alpha=0.7, edgecolor='black')
ax.set_xlabel('Importance', fontsize=11)
ax.set_title('Top 10 Features - XGBoost', fontsize=12, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

plt.suptitle('Feature Importance Comparison', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()
</VSCode.Cell>

<VSCode.Cell id="model_013" language="python">
# Step 9: Ensemble Model
print("\n" + "="*60)
print("STEP 9: ENSEMBLE MODEL (VOTING CLASSIFIER)")
print("="*60)

# Create ensemble model
ensemble_model = VotingClassifier(
    estimators=[
        ('lr', lr_model),
        ('rf', rf_model),
        ('xgb', xgb_model)
    ],
    voting='soft'
)

print("Ensemble created with:")
print("  - Logistic Regression")
print("  - Random Forest")
print("  - XGBoost")
print("  - Voting: Soft (probability averaging)")

# Make predictions
y_pred_ensemble = ensemble_model.predict(X_test)
y_pred_proba_ensemble = ensemble_model.predict_proba(X_test)[:, 1]

# Evaluate
evaluator_ensemble = ModelEvaluator()
ensemble_results = evaluator_ensemble.evaluate(y_test, y_pred_ensemble, y_pred_proba_ensemble)

print("\nEnsemble Model Performance:")
print(f"  Accuracy: {ensemble_results['accuracy']:.4f}")
print(f"  Precision: {ensemble_results['precision']:.4f}")
print(f"  Recall: {ensemble_results['recall']:.4f}")
print(f"  F1-Score: {ensemble_results['f1']:.4f}")
print(f"  ROC-AUC: {ensemble_results['roc_auc']:.4f}")

# Compare with individual models
print("\n✓ Ensemble performance compared with individual models")
</VSCode.Cell>

<VSCode.Cell id="model_014" language="python">
# Step 10: Cross-Validation
print("\n" + "="*60)
print("STEP 10: CROSS-VALIDATION ANALYSIS")
print("="*60)

# Perform cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cv_results = {}

for name, model in models_dict.items():
    print(f"\nCross-validating {name}...")
    
    # ROC-AUC scores
    roc_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='roc_auc')
    f1_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='f1')
    
    cv_results[name] = {
        'roc_auc_mean': roc_scores.mean(),
        'roc_auc_std': roc_scores.std(),
        'f1_mean': f1_scores.mean(),
        'f1_std': f1_scores.std(),
        'roc_auc_scores': roc_scores,
        'f1_scores': f1_scores
    }
    
    print(f"  ROC-AUC: {roc_scores.mean():.4f} (+/- {roc_scores.std():.4f})")
    print(f"  F1-Score: {f1_scores.mean():.4f} (+/- {f1_scores.std():.4f})")

# Create summary table
cv_summary = pd.DataFrame({
    'Model': cv_results.keys(),
    'ROC-AUC Mean': [v['roc_auc_mean'] for v in cv_results.values()],
    'ROC-AUC Std': [v['roc_auc_std'] for v in cv_results.values()],
    'F1 Mean': [v['f1_mean'] for v in cv_results.values()],
    'F1 Std': [v['f1_std'] for v in cv_results.values()]
})

print("\n" + cv_summary.round(4).to_string(index=False))
</VSCode.Cell>

<VSCode.Cell id="model_015" language="python">
# Visualize cross-validation results
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

models_list = list(cv_results.keys())
roc_means = [cv_results[m]['roc_auc_mean'] for m in models_list]
roc_stds = [cv_results[m]['roc_auc_std'] for m in models_list]
f1_means = [cv_results[m]['f1_mean'] for m in models_list]
f1_stds = [cv_results[m]['f1_std'] for m in models_list]

# ROC-AUC CV Results
ax = axes[0]
x_pos = np.arange(len(models_list))
ax.bar(x_pos, roc_means, yerr=roc_stds, capsize=10, color=colors, alpha=0.7, edgecolor='black')
ax.set_xticks(x_pos)
ax.set_xticklabels(models_list, rotation=45, ha='right')
ax.set_ylabel('ROC-AUC', fontsize=11)
ax.set_title('Cross-Validation: ROC-AUC', fontsize=12, fontweight='bold')
ax.grid(axis='y', alpha=0.3)
ax.set_ylim([0, 1])

# F1-Score CV Results
ax = axes[1]
ax.bar(x_pos, f1_means, yerr=f1_stds, capsize=10, color=colors, alpha=0.7, edgecolor='black')
ax.set_xticks(x_pos)
ax.set_xticklabels(models_list, rotation=45, ha='right')
ax.set_ylabel('F1-Score', fontsize=11)
ax.set_title('Cross-Validation: F1-Score', fontsize=12, fontweight='bold')
ax.grid(axis='y', alpha=0.3)
ax.set_ylim([0, 1])

plt.suptitle('5-Fold Cross-Validation Results', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

print("\n✓ Cross-validation results visualized")
</VSCode.Cell>

<VSCode.Cell id="model_016" language="python">
# Step 11: Business Metrics & Recommendations
print("\n" + "="*60)
print("STEP 11: BUSINESS METRICS & RECOMMENDATIONS")
print("="*60)

print("\nKEY BUSINESS METRICS:")
print("-" * 60)

for name, results in evaluation_results.items():
    print(f"\n{name}:")
    print(f"  False Positive Rate: {results['false_positive_rate']:.4f}")
    print(f"    → {results['false_positives']} legitimate apps rejected (COST)")
    print(f"  Fraud Detection Rate: {results['fraud_detection_rate']:.4f}")
    print(f"    → {results['true_positives']} fraud cases caught (BENEFIT)")
    print(f"  Specificity: {results['specificity']:.4f}")
    print(f"    → Ability to identify legitimate applications")

print("\n" + "="*60)
print("RECOMMENDATIONS")
print("="*60)

recommendations = """
1. BEST MODEL FOR PRODUCTION:
   - Select model based on business priorities
   - Fraud detection (Recall) vs False positives (Specificity) trade-off
   - Consider ensemble approach for robustness

2. THRESHOLD TUNING:
   - Default threshold = 0.5 (50% probability)
   - Adjust threshold to balance precision/recall
   - Lower threshold → More fraud detection but more false positives
   - Higher threshold → Fewer false positives but miss fraud

3. HANDLING CLASS IMBALANCE:
   ✓ SMOTE applied during training
   ✓ Stratified cross-validation used
   ✓ Evaluate using appropriate metrics (ROC-AUC, F1, Precision-Recall)
   ✗ Avoid accuracy as primary metric

4. MODEL DEPLOYMENT:
   - Monitor model performance in production
   - Retrain periodically as fraud patterns evolve
   - Log predictions for auditing and model improvement
   - Implement A/B testing for threshold changes

5. ENSEMBLE ADVANTAGES:
   - Combines strengths of multiple models
   - Reduces risk of single model failure
   - Often provides better generalization
   - Recommended for critical applications

6. FEATURE ENGINEERING:
   - Top features show good discriminative power
   - Consider domain expertise in feature creation
   - Monitor feature importance over time
   - Remove redundant features if needed
"""

print(recommendations)
</VSCode.Cell>

<VSCode.Cell id="model_017" language="python">
# Final Summary
print("\n" + "="*60)
print("MODELING SUMMARY")
print("="*60)

summary_table = pd.DataFrame({
    'Aspect': [
        'Models Trained',
        'Evaluation Metrics',
        'Cross-Validation Folds',
        'Test Set Size',
        'Imbalance Ratio',
        'Best Metric Score',
        'Feature Count',
        'Ensemble Used'
    ],
    'Value': [
        '3 (LR, RF, XGB)',
        'Accuracy, Precision, Recall, F1, ROC-AUC',
        '5',
        f'{len(X_test)} samples',
        f'{(y_test==0).sum()}:{(y_test==1).sum()}',
        f'{comparison_df["ROC-AUC"].max():.4f}',
        X_train.shape[1],
        'Yes (Voting Classifier)'
    ]
})

print("\n" + summary_table.to_string(index=False))

print("\n✓ Model training and evaluation complete!")
print("✓ Ready for deployment or further optimization")
</VSCode.Cell>
```