In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                           roc_curve, precision_recall_curve, auc, average_precision_score,
                           confusion_matrix, classification_report)
from sklearn.calibration import calibration_curve
import os
from datetime import datetime
import joblib

# Set visualization style for consistent, professional-looking plots
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = [10, 6]

class LoanModelEvaluator:
    """
    A comprehensive evaluation framework that combines technical metrics with business insights
    for loan prediction models.
    """
    def __init__(self, models_dict, train_data, test_data, output_dir=None):
        """
        Initialize the evaluator with models and data.

        Parameters:
        -----------
        models_dict : dict
            Dictionary containing trained models from Phase 2
        train_data : pandas.DataFrame
            Processed training data
        test_data : pandas.DataFrame
            Raw test data that needs processing
        output_dir : str, optional
            Directory to save evaluation results
        """
        self.models = models_dict
        self.train_data = train_data
        self.test_data = self._process_test_data(test_data, train_data)

        # Create results directory
        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.results_dir = output_dir or f"evaluation_results_{self.timestamp}"
        os.makedirs(self.results_dir, exist_ok=True)

        # Business parameters for financial analysis
        self.params = {
            'default_loss_rate': 0.15,  # Expected loss on defaulted loans
            'profit_margin': 0.10,      # Expected profit on good loans
            'processing_cost': 1000      # Cost per application
        }

    def _process_test_data(self, test_df, train_df):
        """
        Process test data to match training data format and features.
        """
        test_processed = test_df.copy()

        # Handle missing values using training data statistics
        numeric_cols = ['LoanAmount', 'Loan_Amount_Term', 'Credit_History']
        for col in numeric_cols:
            test_processed[col].fillna(train_df[col].median(), inplace=True)

        categorical_cols = ['Gender', 'Married', 'Dependents', 'Self_Employed']
        for col in categorical_cols:
            test_processed[col].fillna(train_df[col].mode()[0], inplace=True)

        # Create engineered features
        test_processed['Total_Income'] = (test_processed['ApplicantIncome'] +
                                        test_processed['CoapplicantIncome'])

        # Log transformations
        for col in ['Total_Income', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']:
            test_processed[f'Log_{col}'] = np.log(test_processed[col] + 1)

        # Financial ratios
        test_processed['Income_to_Loan_Ratio'] = test_processed['Total_Income'] / test_processed['LoanAmount']
        test_processed['EMI'] = (test_processed['LoanAmount'] * 1000 * 0.1) / 12
        test_processed['Balance_Income'] = test_processed['Total_Income'] - test_processed['EMI']

        # Handle categorical variables
        test_processed = pd.get_dummies(test_processed,
                                      columns=['Education', 'Property_Area'],
                                      drop_first=True)

        # Ensure column consistency with training data
        train_cols = train_df.drop('Loan_Status', axis=1).columns
        for col in train_cols:
            if col not in test_processed.columns:
                test_processed[col] = 0

        return test_processed[train_cols]

    def _calculate_business_metrics(self, y_true, y_pred, loan_amounts):
        """
        Calculate business-oriented metrics including financial impact.
        """
        # Identify different prediction cases
        true_positives = (y_pred == 1) & (y_true == 1)
        false_positives = (y_pred == 1) & (y_true == 0)
        false_negatives = (y_pred == 0) & (y_true == 1)

        # Calculate financial impacts
        potential_losses = (loan_amounts[false_positives] * self.params['default_loss_rate']).sum()
        missed_opportunities = (loan_amounts[false_negatives] * self.params['profit_margin']).sum()
        expected_profits = (loan_amounts[true_positives] * self.params['profit_margin']).sum()
        processing_costs = len(y_true) * self.params['processing_cost']

        return {
            'potential_losses': potential_losses,
            'missed_opportunities': missed_opportunities,
            'expected_profits': expected_profits,
            'processing_costs': processing_costs,
            'net_impact': expected_profits - potential_losses -
                         missed_opportunities - processing_costs
        }

    def evaluate_model_performance(self, model, name, data='train'):
        """
        Comprehensive evaluation of a single model's performance.
        """
        # Select appropriate dataset
        if data == 'train':
            X = self.train_data.drop('Loan_Status', axis=1)
            y = self.train_data['Loan_Status']
        else:
            X = self.test_data
            y_pred = model.predict(X)
            return {'predictions': y_pred,
                   'probabilities': model.predict_proba(X)[:, 1]}

        # Get predictions
        y_pred = model.predict(X)
        y_prob = model.predict_proba(X)[:, 1]

        # Calculate metrics
        metrics = {
            'accuracy': accuracy_score(y, y_pred),
            'precision': precision_score(y, y_pred),
            'recall': recall_score(y, y_pred),
            'f1_score': f1_score(y, y_pred),
            'roc_auc': auc(*roc_curve(y, y_prob)[:2]),
            'avg_precision': average_precision_score(y, y_prob)
        }

        # Calculate business metrics
        business_metrics = self._calculate_business_metrics(
            y, y_pred, X['LoanAmount'] * 1000)

        # Create visualizations
        self._plot_model_diagnostics(y, y_pred, y_prob, name, data)

        return {**metrics, **business_metrics}

    def _plot_model_diagnostics(self, y_true, y_pred, y_prob, model_name, dataset):
        """
        Create and save diagnostic plots for model performance.
        """
        # Confusion Matrix
        plt.figure(figsize=(8, 6))
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=['Not Approved', 'Approved'],
                   yticklabels=['Not Approved', 'Approved'])
        plt.title(f'Confusion Matrix - {model_name} ({dataset})')
        plt.savefig(os.path.join(self.results_dir,
                                f'confusion_matrix_{model_name}_{dataset}.png'))
        plt.close()

        # ROC Curve
        plt.figure(figsize=(8, 6))
        fpr, tpr, _ = roc_curve(y_true, y_prob)
        plt.plot(fpr, tpr, label=f'ROC curve (AUC = {auc(fpr, tpr):.3f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curve - {model_name} ({dataset})')
        plt.legend()
        plt.savefig(os.path.join(self.results_dir,
                                f'roc_curve_{model_name}_{dataset}.png'))
        plt.close()

        # Calibration Curve
        plt.figure(figsize=(8, 6))
        prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=10)
        plt.plot(prob_pred, prob_true, marker='o')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlabel('Mean Predicted Probability')
        plt.ylabel('True Probability')
        plt.title(f'Calibration Curve - {model_name} ({dataset})')
        plt.savefig(os.path.join(self.results_dir,
                                f'calibration_curve_{model_name}_{dataset}.png'))
        plt.close()

    def analyze_feature_importance(self, model, name):
        """
        Analyze and visualize feature importance.
        """
        importance = pd.DataFrame({
            'feature': self.train_data.drop('Loan_Status', axis=1).columns,
            'importance': model.feature_importances_
        }).sort_values('importance', ascending=False)

        plt.figure(figsize=(12, 6))
        sns.barplot(data=importance, x='importance', y='feature')
        plt.title(f'Feature Importance - {name}')
        plt.tight_layout()
        plt.savefig(os.path.join(self.results_dir, f'feature_importance_{name}.png'))
        plt.close()

        return importance

    def generate_comprehensive_report(self):
        """
        Generate a comprehensive evaluation report for all models.
        """
        results = {
            'train_performance': {},
            'test_predictions': {},
            'feature_importance': {}
        }

        for name, model in self.models.items():
            # Evaluate on training data
            print(f"\nEvaluating {name}...")
            results['train_performance'][name] = self.evaluate_model_performance(
                model, name, 'train')

            # Generate test predictions
            results['test_predictions'][name] = self.evaluate_model_performance(
                model, name, 'test')

            # Analyze feature importance
            results['feature_importance'][name] = self.analyze_feature_importance(
                model, name)

        # Save results
        self._save_results(results)

        return results

    def _save_results(self, results):
        """
        Save evaluation results to files.
        """
        # Save training performance metrics
        pd.DataFrame(results['train_performance']).to_csv(
            os.path.join(self.results_dir, 'training_performance.csv'))

        # Save test predictions
        for name, preds in results['test_predictions'].items():
            pd.DataFrame({
                'Predicted_Label': preds['predictions'],
                'Probability': preds['probabilities']
            }).to_csv(os.path.join(self.results_dir, f'test_predictions_{name}.csv'))

        # Save feature importance
        for name, importance in results['feature_importance'].items():
            importance.to_csv(os.path.join(self.results_dir,
                                         f'feature_importance_{name}.csv'))

# Example usage:
# Assuming we have our models from Phase 2
model_dict = {
    'Decision Tree': dt_grid.best_estimator_,
    'Random Forest': rf_grid.best_estimator_,
    'Gradient Boosting': gb_grid.best_estimator_
}

# Load data
train_data = pd.read_csv('processed_train_data_final.csv')
test_data = pd.read_csv('Test_Data.csv')

# Create evaluator
evaluator = LoanModelEvaluator(model_dict, train_data, test_data)

# Generate comprehensive evaluation
results = evaluator.generate_comprehensive_report()

# Display summary of results
print("\nModel Performance Summary:")
performance_df = pd.DataFrame(results['train_performance']).round(3)
print(performance_df)

print("\nTest Predictions Summary:")
for name in model_dict.keys():
    preds = results['test_predictions'][name]['predictions']
    print(f"\n{name}:")
    print("Prediction Distribution:")
    print(pd.Series(preds).value_counts(normalize=True).round(3))



NameError: name 'dt_grid' is not defined