# Model Testing Framework - Upload Model & Dataset
This notebook allows you to upload a pre-trained machine learning model and a test dataset to evaluate the model's performance using Recall and F1-score metrics.

In [None]:
# Install required packages
!pip install scikit-learn pandas numpy matplotlib seaborn imbalanced-learn scipy openpyxl plotly

# Import all necessary libraries   
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Scikit-learn imports
from sklearn.metrics import (
    classification_report, confusion_matrix, 
    f1_score, recall_score, precision_score, accuracy_score,
    roc_auc_score, roc_curve, precision_recall_curve
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from scipy.io import arff
from io import StringIO

# Google Colab file upload
from google.colab import files

print("‚úÖ All packages installed and imported successfully!")

## 1. Upload Your Pre-trained Model
Upload your saved machine learning model (.pkl file)

In [None]:
print("üìÅ Upload your pre-trained model (.pkl file)")
print("Supported model types: KNN, SVM, Random Forest, Logistic Regression, etc.")

uploaded_model = files.upload()
model_filename = list(uploaded_model.keys())[0]

try:
    # Load the model
    with open(model_filename, 'rb') as f:
        model_data = pickle.load(f)
    
    print(f"‚úÖ Model loaded successfully: {model_filename}")
    
    # Check model structure and extract components
    if isinstance(model_data, dict):
        print("\nüìä Model package contents:")
        for key in model_data.keys():
            print(f"  - {key}: {type(model_data[key])}")
        
        # Extract model components based on common structures
        if 'model' in model_data:
            model = model_data['model']
            print(f"\nü§ñ Model type: {type(model).__name__}")
        else:
            # If the pickle file contains the model directly
            model = model_data
            print(f"\nü§ñ Model type: {type(model).__name__}")
        
        # Extract other components if available
        scaler = model_data.get('scaler', None)
        selected_features = model_data.get('selected_features', None)
        selected_feature_indices = model_data.get('selected_feature_indices', None)
        feature_names = model_data.get('feature_names', None)
        hyperparameters = model_data.get('hyperparameters', model_data.get('best_hyperparameters', None))
        
        print(f"\nüìã Additional components:")
        print(f"  - Scaler: {'‚úÖ' if scaler is not None else '‚ùå'}")
        print(f"  - Selected features: {'‚úÖ' if selected_features is not None else '‚ùå'}")
        print(f"  - Feature names: {'‚úÖ' if feature_names is not None else '‚ùå'}")
        print(f"  - Hyperparameters: {'‚úÖ' if hyperparameters is not None else '‚ùå'}")
        
        if hyperparameters:
            print(f"\n‚öôÔ∏è Model hyperparameters:")
            for param, value in hyperparameters.items():
                print(f"  - {param}: {value}")
        
    else:
        # Direct model object
        model = model_data
        scaler = None
        selected_features = None
        selected_feature_indices = None
        feature_names = None
        hyperparameters = None
        print(f"\nü§ñ Direct model type: {type(model).__name__}")
        
except Exception as e:
    print(f"‚ùå Error loading model: {str(e)}")
    print("Please ensure you've uploaded a valid pickle (.pkl) file containing a trained model.")

## 2. Upload Your Test Dataset
Upload your test dataset (supports .csv, .arff, .xlsx formats)

In [None]:
print("üìÅ Upload your test dataset")
print("Supported formats: .csv, .arff, .xlsx")

uploaded_data = files.upload()
data_filename = list(uploaded_data.keys())[0]
file_extension = data_filename.split('.')[-1].lower()

try:
    # Load dataset based on file type
    if file_extension == 'csv':
        df = pd.read_csv(data_filename)
        print(f"‚úÖ CSV file loaded: {data_filename}")
    
    elif file_extension == 'arff':
        with open(data_filename, 'r') as f:
            content = f.read()
        data, meta = arff.loadarff(StringIO(content))
        df = pd.DataFrame(data)
        print(f"‚úÖ ARFF file loaded: {data_filename}")
    
    elif file_extension in ['xlsx', 'xls']:
        df = pd.read_excel(data_filename)
        print(f"‚úÖ Excel file loaded: {data_filename}")
    
    else:
        raise ValueError(f"Unsupported file format: {file_extension}")
    
    print(f"\nüìä Dataset shape: {df.shape}")
    print(f"üìä Columns: {list(df.columns)}")
    
    # Display first few rows
    print("\nüìã First 5 rows:")
    display(df.head())
    
    # Check for missing values
    missing_values = df.isnull().sum().sum()
    print(f"\nüîç Missing values: {missing_values}")
    
    if missing_values > 0:
        print("‚ö†Ô∏è Missing values detected per column:")
        missing_per_col = df.isnull().sum()[df.isnull().sum() > 0]
        for col, count in missing_per_col.items():
            print(f"  - {col}: {count} missing values")
    
except Exception as e:
    print(f"‚ùå Error loading dataset: {str(e)}")
    print("Please ensure you've uploaded a valid dataset file.")

## 3. Configure Target Column and Data Preprocessing

In [None]:
# Automatically detect target column or let user specify
print("üéØ Configure target column and preprocessing")
print("\nAvailable columns:")
for i, col in enumerate(df.columns):
    unique_vals = df[col].nunique()
    data_type = df[col].dtype
    print(f"  {i}: {col} (dtype: {data_type}, unique values: {unique_vals})")

# Common target column names
common_targets = ['bug', 'defect', 'class', 'target', 'label', 'y', 'outcome', 'diagnosis']
detected_target = None

for col in df.columns:
    col_lower = col.lower()
    if col_lower in common_targets or col_lower.endswith('_class') or col_lower.endswith('_label'):
        # Check if it's binary or has few unique values (classification)
        if df[col].nunique() <= 10:
            detected_target = col
            break

if detected_target:
    print(f"\nüéØ Auto-detected target column: '{detected_target}'")
    target_col = detected_target
else:
    print("\n‚ùì Please specify the target column name:")
    target_col = input("Enter target column name: ").strip()

if target_col not in df.columns:
    print(f"‚ùå Column '{target_col}' not found in dataset!")
    print("Available columns:", list(df.columns))
else:
    print(f"‚úÖ Using target column: '{target_col}'")
    
    # Analyze target column
    print(f"\nüìä Target column analysis:")
    print(f"  - Data type: {df[target_col].dtype}")
    print(f"  - Unique values: {df[target_col].nunique()}")
    print(f"  - Value counts:")
    print(df[target_col].value_counts())
    
    # Convert target to binary if needed
    if df[target_col].dtype == object or df[target_col].dtype.name == 'bytes':
        # Handle byte strings (common in ARFF files)
        if df[target_col].dtype.name == 'bytes':
            df[target_col] = df[target_col].apply(lambda x: x.decode() if isinstance(x, bytes) else x)
        
        # Convert to binary
        unique_values = df[target_col].unique()
        print(f"\nüîÑ Converting categorical target to binary:")
        print(f"  - Unique values: {unique_values}")
        
        # Auto-detect positive class
        positive_indicators = ['Y', 'yes', 'true', 'True', '1', 'positive', 'defect', 'bug', 'M']
        positive_class = None
        
        for val in unique_values:
            if str(val) in positive_indicators:
                positive_class = val
                break
        
        if positive_class is None:
            positive_class = unique_values[1] if len(unique_values) > 1 else unique_values[0]
        
        print(f"  - Positive class: '{positive_class}' -> 1")
        print(f"  - Negative class: others -> 0")
        
        y = df[target_col].apply(lambda x: 1 if x == positive_class else 0)
    else:
        y = df[target_col].copy()
    
    # Extract features
    X = df.drop(columns=[target_col])
    
    print(f"\nüìä Preprocessed data:")
    print(f"  - Features shape: {X.shape}")
    print(f"  - Target shape: {y.shape}")
    print(f"  - Target distribution: {y.value_counts().to_dict()}")

## 4. Apply Model-Specific Preprocessing

In [None]:
print("üîß Applying model-specific preprocessing...")

# Store original feature names
original_feature_names = X.columns.tolist()
X_processed = X.copy()

# Handle non-numeric columns
non_numeric_cols = X_processed.select_dtypes(include=['object']).columns
if len(non_numeric_cols) > 0:
    print(f"\nüîÑ Converting non-numeric columns: {list(non_numeric_cols)}")
    
    for col in non_numeric_cols:
        if X_processed[col].dtype.name == 'bytes':
            # Handle byte strings
            X_processed[col] = X_processed[col].apply(lambda x: x.decode() if isinstance(x, bytes) else x)
        
        # Simple label encoding
        le = LabelEncoder()
        X_processed[col] = le.fit_transform(X_processed[col].astype(str))

# Handle missing values
if X_processed.isnull().sum().sum() > 0:
    print("\nüîß Handling missing values (filling with median)...")
    X_processed = X_processed.fillna(X_processed.median())

# Apply scaling if model has a scaler
if scaler is not None:
    print("\nüìè Applying model's scaler...")
    X_scaled = scaler.transform(X_processed)
    X_processed = pd.DataFrame(X_scaled, columns=X_processed.columns)
    print("‚úÖ Data scaled successfully")
else:
    print("\nüìè No scaler found in model. Using original feature values.")

# Apply feature selection if model has selected features
if selected_features is not None and len(selected_features) > 0:
    print(f"\nüéØ Applying feature selection ({len(selected_features)} features)...")
    print(f"Selected features: {selected_features}")
    
    # Check if selected features exist in current dataset
    available_features = [f for f in selected_features if f in X_processed.columns]
    missing_features = [f for f in selected_features if f not in X_processed.columns]
    
    if missing_features:
        print(f"‚ö†Ô∏è Warning: Some selected features are missing: {missing_features}")
    
    if available_features:
        X_processed = X_processed[available_features]
        print(f"‚úÖ Using {len(available_features)} selected features")
    else:
        print("‚ùå No selected features found in dataset!")

elif selected_feature_indices is not None:
    print(f"\nüéØ Applying feature selection by indices ({len(selected_feature_indices)} features)...")
    if max(selected_feature_indices) < X_processed.shape[1]:
        X_processed = X_processed.iloc[:, selected_feature_indices]
        print(f"‚úÖ Using {len(selected_feature_indices)} selected features")
    else:
        print("‚ùå Feature indices exceed dataset dimensions!")

print(f"\nüìä Final processed data shape: {X_processed.shape}")
print(f"üìä Feature columns: {list(X_processed.columns)}")

## 5. Make Predictions and Calculate Metrics

In [None]:
print("üîÆ Making predictions...")

try:
    # Make predictions
    y_pred = model.predict(X_processed)
    
    # Get prediction probabilities if available
    try:
        if hasattr(model, 'predict_proba'):
            y_pred_proba = model.predict_proba(X_processed)[:, 1]  # Probability of positive class
        elif hasattr(model, 'decision_function'):
            y_pred_proba = model.decision_function(X_processed)
        else:
            y_pred_proba = None
    except:
        y_pred_proba = None
    
    print(f"‚úÖ Predictions completed!")
    print(f"üìä Predictions shape: {y_pred.shape}")
    print(f"üìä Prediction distribution: {pd.Series(y_pred).value_counts().to_dict()}")
    
    # Calculate comprehensive metrics
    print("\nüìä PERFORMANCE METRICS")
    print("=" * 50)
    
    # Main metrics
    f1 = f1_score(y, y_pred)
    recall = recall_score(y, y_pred)
    precision = precision_score(y, y_pred)
    accuracy = accuracy_score(y, y_pred)
    
    print(f"üéØ F1-Score:  {f1:.4f}")
    print(f"üéØ Recall:    {recall:.4f}")
    print(f"üéØ Precision: {precision:.4f}")
    print(f"üéØ Accuracy:  {accuracy:.4f}")
    
    # AUC if probabilities available
    if y_pred_proba is not None:
        try:
            auc = roc_auc_score(y, y_pred_proba)
            print(f"üéØ AUC-ROC:   {auc:.4f}")
        except:
            auc = None
            print(f"üéØ AUC-ROC:   Not available")
    else:
        auc = None
        print(f"üéØ AUC-ROC:   Not available (no probabilities)")
    
    # Confusion Matrix
    cm = confusion_matrix(y, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    print("\nüìä CONFUSION MATRIX")
    print("=" * 30)
    print(f"True Negatives:  {tn}")
    print(f"False Positives: {fp}")
    print(f"False Negatives: {fn}")
    print(f"True Positives:  {tp}")
    
    # Additional derived metrics
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0  # Negative Predictive Value
    
    print("\nüìä ADDITIONAL METRICS")
    print("=" * 30)
    print(f"Specificity (TNR): {specificity:.4f}")
    print(f"Negative Pred. Value: {npv:.4f}")
    
    # Detailed classification report
    print("\nüìä DETAILED CLASSIFICATION REPORT")
    print("=" * 50)
    print(classification_report(y, y_pred, target_names=['Class 0', 'Class 1']))
    
except Exception as e:
    print(f"‚ùå Error during prediction: {str(e)}")
    print("\nTroubleshooting tips:")
    print("- Check if the model expects the same number of features as your dataset")
    print("- Ensure feature names match (if feature selection was used)")
    print("- Verify data types and preprocessing requirements")

## 6. Create Visualizations

In [None]:
if 'y_pred' in locals():
    print("üìä Creating visualizations...")
    
    # Set up the plotting style
    plt.style.use('default')
    sns.set_palette("husl")
    
    # Create a figure with multiple subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Confusion Matrix Heatmap
    plt.subplot(2, 3, 1)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Predicted 0', 'Predicted 1'],
                yticklabels=['Actual 0', 'Actual 1'])
    plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
    
    # 2. Metrics Bar Plot
    plt.subplot(2, 3, 2)
    metrics = ['F1-Score', 'Recall', 'Precision', 'Accuracy']
    values = [f1, recall, precision, accuracy]
    colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
    
    bars = plt.bar(metrics, values, color=colors, alpha=0.8)
    plt.title('Performance Metrics', fontsize=14, fontweight='bold')
    plt.ylabel('Score')
    plt.ylim(0, 1.1)
    
    # Add value labels on bars
    for bar, value in zip(bars, values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
                f'{value:.3f}', ha='center', va='bottom', fontweight='bold')
    
    plt.xticks(rotation=45)
    
    # 3. Prediction Distribution
    plt.subplot(2, 3, 3)
    pred_counts = pd.Series(y_pred).value_counts()
    plt.pie(pred_counts.values, labels=[f'Class {i}' for i in pred_counts.index], 
            autopct='%1.1f%%', startangle=90, colors=['#FF9999', '#66B2FF'])
    plt.title('Prediction Distribution', fontsize=14, fontweight='bold')
    
    # 4. True vs Predicted scatter plot (if enough samples)
    plt.subplot(2, 3, 4)
    if len(y) <= 1000:  # Only for smaller datasets to avoid clutter
        # Add some jitter for better visualization
        y_jitter = y + np.random.normal(0, 0.05, len(y))
        pred_jitter = y_pred + np.random.normal(0, 0.05, len(y_pred))
        
        plt.scatter(y_jitter, pred_jitter, alpha=0.6, c=y_pred, cmap='RdYlBu')
        plt.xlabel('True Labels')
        plt.ylabel('Predicted Labels')
        plt.title('True vs Predicted Labels', fontsize=14, fontweight='bold')
        
        # Add diagonal line for perfect predictions
        plt.plot([0, 1], [0, 1], 'r--', alpha=0.8, linewidth=2)
    else:
        # For larger datasets, show a summary table
        summary_data = {
            'Metric': ['Total Samples', 'Correct Predictions', 'Incorrect Predictions', 'Accuracy %'],
            'Value': [len(y), (y == y_pred).sum(), (y != y_pred).sum(), f'{accuracy*100:.1f}%']
        }
        
        summary_df = pd.DataFrame(summary_data)
        table = plt.table(cellText=summary_df.values, colLabels=summary_df.columns,
                         cellLoc='center', loc='center', bbox=[0, 0, 1, 1])
        table.auto_set_font_size(False)
        table.set_fontsize(12)
        table.scale(1, 2)
        plt.axis('off')
        plt.title('Prediction Summary', fontsize=14, fontweight='bold')
    
    # 5. ROC Curve (if probabilities available)
    plt.subplot(2, 3, 5)
    if y_pred_proba is not None and auc is not None:
        fpr, tpr, _ = roc_curve(y, y_pred_proba)
        plt.plot(fpr, tpr, color='darkorange', lw=2, 
                label=f'ROC curve (AUC = {auc:.3f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve', fontsize=14, fontweight='bold')
        plt.legend(loc="lower right")
    else:
        plt.text(0.5, 0.5, 'ROC Curve\nNot Available\n(No Probabilities)', 
                ha='center', va='center', fontsize=12, 
                bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        plt.title('ROC Curve', fontsize=14, fontweight='bold')
    
    # 6. Precision-Recall Curve (if probabilities available)
    plt.subplot(2, 3, 6)
    if y_pred_proba is not None:
        try:
            precision_curve, recall_curve, _ = precision_recall_curve(y, y_pred_proba)
            plt.plot(recall_curve, precision_curve, color='blue', lw=2)
            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.title('Precision-Recall Curve', fontsize=14, fontweight='bold')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
        except:
            plt.text(0.5, 0.5, 'Precision-Recall\nCurve Error', 
                    ha='center', va='center', fontsize=12,
                    bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.8))
            plt.xlim(0, 1)
            plt.ylim(0, 1)
            plt.title('Precision-Recall Curve', fontsize=14, fontweight='bold')
    else:
        plt.text(0.5, 0.5, 'Precision-Recall\nCurve Not Available\n(No Probabilities)', 
                ha='center', va='center', fontsize=12,
                bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        plt.title('Precision-Recall Curve', fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    print("‚úÖ Visualizations created successfully!")
else:
    print("‚ùå No predictions available for visualization.")

## 7. Detailed Analysis and Results Summary

In [None]:
if 'y_pred' in locals():
    print("üìã DETAILED ANALYSIS REPORT")
    print("=" * 60)
    
    # Model Information
    print("\nü§ñ MODEL INFORMATION:")
    print(f"  ‚Ä¢ Model Type: {type(model).__name__}")
    print(f"  ‚Ä¢ Model File: {model_filename}")
    if hyperparameters:
        print(f"  ‚Ä¢ Hyperparameters:")
        for param, value in hyperparameters.items():
            print(f"    - {param}: {value}")
    
    # Dataset Information
    print("\nüìä DATASET INFORMATION:")
    print(f"  ‚Ä¢ Dataset File: {data_filename}")
    print(f"  ‚Ä¢ Total Samples: {len(y)}")
    print(f"  ‚Ä¢ Total Features: {X_processed.shape[1]}")
    print(f"  ‚Ä¢ Target Column: {target_col}")
    print(f"  ‚Ä¢ Class Distribution: {dict(pd.Series(y).value_counts())}")
    
    # Performance Summary
    print("\nüéØ PERFORMANCE SUMMARY:")
    print(f"  ‚Ä¢ F1-Score: {f1:.4f} {'üü¢' if f1 >= 0.8 else 'üü°' if f1 >= 0.6 else 'üî¥'}")
    print(f"  ‚Ä¢ Recall: {recall:.4f} {'üü¢' if recall >= 0.8 else 'üü°' if recall >= 0.6 else 'üî¥'}")
    print(f"  ‚Ä¢ Precision: {precision:.4f} {'üü¢' if precision >= 0.8 else 'üü°' if precision >= 0.6 else 'üî¥'}")
    print(f"  ‚Ä¢ Accuracy: {accuracy:.4f} {'üü¢' if accuracy >= 0.8 else 'üü°' if accuracy >= 0.6 else 'üî¥'}")
    if auc is not None:
        print(f"  ‚Ä¢ AUC-ROC: {auc:.4f} {'üü¢' if auc >= 0.8 else 'üü°' if auc >= 0.6 else 'üî¥'}")
    
    # Classification Performance per Class
    print("\nüìä PER-CLASS PERFORMANCE:")
    for class_label in [0, 1]:
        class_mask = (y == class_label)
        class_correct = ((y == class_label) & (y_pred == class_label)).sum()
        class_total = class_mask.sum()
        class_accuracy = class_correct / class_total if class_total > 0 else 0
        print(f"  ‚Ä¢ Class {class_label}: {class_correct}/{class_total} correct ({class_accuracy:.3f})")
    
    # Error Analysis
    print("\n‚ùå ERROR ANALYSIS:")
    false_positives = ((y == 0) & (y_pred == 1)).sum()
    false_negatives = ((y == 1) & (y_pred == 0)).sum()
    print(f"  ‚Ä¢ False Positives: {false_positives} ({false_positives/len(y)*100:.1f}%)")
    print(f"  ‚Ä¢ False Negatives: {false_negatives} ({false_negatives/len(y)*100:.1f}%)")
    
    # Recommendations
    print("\nüí° RECOMMENDATIONS:")
    if f1 < 0.6:
        print("  ‚ö†Ô∏è Low F1-score suggests model needs improvement")
        print("     Consider: hyperparameter tuning, feature engineering, or different algorithm")
    
    if recall < 0.7 and precision > 0.8:
        print("  ‚ö†Ô∏è High precision but low recall - model is conservative")
        print("     Consider: adjusting decision threshold or addressing class imbalance")
    
    if precision < 0.7 and recall > 0.8:
        print("  ‚ö†Ô∏è High recall but low precision - model is aggressive")
        print("     Consider: more restrictive decision criteria or better feature selection")
    
    if accuracy > 0.8 and f1 > 0.8 and recall > 0.8:
        print("  ‚úÖ Excellent performance across all metrics!")
    elif accuracy > 0.7 and f1 > 0.7:
        print("  ‚úÖ Good performance - model is working well")
    
    # Feature Information
    if selected_features:
        print("\nüéØ FEATURE SELECTION APPLIED:")
        print(f"  ‚Ä¢ Original Features: {len(original_feature_names)}")
        print(f"  ‚Ä¢ Selected Features: {len(selected_features)}")
        print(f"  ‚Ä¢ Reduction: {(1 - len(selected_features)/len(original_feature_names))*100:.1f}%")
    
    print("\n" + "=" * 60)
    print(f"Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 60)

else:
    print("‚ùå No predictions available for detailed analysis.")

## 8. Export Results

In [None]:
if 'y_pred' in locals():
    print("üíæ Exporting results to Excel...")
    
    # Create timestamp for filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_filename = f'model_evaluation_results_{timestamp}.xlsx'
    
    try:
        with pd.ExcelWriter(results_filename, engine='openpyxl') as writer:
            
            # Sheet 1: Summary Metrics
            summary_metrics = pd.DataFrame({
                'Metric': ['F1-Score', 'Recall', 'Precision', 'Accuracy', 'AUC-ROC', 
                          'True Positives', 'True Negatives', 'False Positives', 'False Negatives',
                          'Specificity', 'NPV'],
                'Value': [f1, recall, precision, accuracy, auc if auc else 'N/A',
                         tp, tn, fp, fn, specificity, npv]
            })
            summary_metrics.to_excel(writer, sheet_name='Summary_Metrics', index=False)
            
            # Sheet 2: Detailed Results (predictions for each sample)
            detailed_results = pd.DataFrame({
                'Sample_Index': range(len(y)),
                'True_Label': y.values if hasattr(y, 'values') else y,
                'Predicted_Label': y_pred,
                'Correct_Prediction': (y.values if hasattr(y, 'values') else y) == y_pred
            })
            
            if y_pred_proba is not None:
                detailed_results['Prediction_Probability'] = y_pred_proba
            
            detailed_results.to_excel(writer, sheet_name='Detailed_Predictions', index=False)
            
            # Sheet 3: Model Information
            model_info_data = {
                'Property': ['Model_Type', 'Model_File', 'Dataset_File', 'Target_Column',
                            'Total_Samples', 'Total_Features', 'Selected_Features_Count',
                            'Analysis_Timestamp'],
                'Value': [type(model).__name__, model_filename, data_filename, target_col,
                         len(y), X_processed.shape[1], 
                         len(selected_features) if selected_features else 'All',
                         datetime.now().strftime('%Y-%m-%d %H:%M:%S')]
            }
            
            if hyperparameters:
                for param, value in hyperparameters.items():
                    model_info_data['Property'].append(f'Hyperparameter_{param}')
                    model_info_data['Value'].append(value)
            
            model_info_df = pd.DataFrame(model_info_data)
            model_info_df.to_excel(writer, sheet_name='Model_Information', index=False)
            
            # Sheet 4: Confusion Matrix
            cm_df = pd.DataFrame(cm, 
                               columns=['Predicted_0', 'Predicted_1'],
                               index=['Actual_0', 'Actual_1'])
            cm_df.to_excel(writer, sheet_name='Confusion_Matrix')
            
            # Sheet 5: Feature Information (if available)
            if selected_features:
                feature_info = pd.DataFrame({
                    'Selected_Features': selected_features,
                    'Feature_Index': range(len(selected_features))
                })
                feature_info.to_excel(writer, sheet_name='Selected_Features', index=False)
        
        print(f"‚úÖ Results exported to: {results_filename}")
        
        # Download the file
        files.download(results_filename)
        print(f"üì• File downloaded successfully!")
        
    except Exception as e:
        print(f"‚ùå Error exporting results: {str(e)}")
        
        # Create a simple CSV as fallback
        try:
            fallback_filename = f'model_evaluation_summary_{timestamp}.csv'
            summary_metrics.to_csv(fallback_filename, index=False)
            print(f"‚úÖ Fallback summary exported to: {fallback_filename}")
            files.download(fallback_filename)
        except Exception as e2:
            print(f"‚ùå Error creating fallback file: {str(e2)}")

else:
    print("‚ùå No results available for export.")

## 9. Conclusion and Next Steps

üéâ **Analysis Complete!**

This notebook has successfully:
- ‚úÖ Loaded your pre-trained model
- ‚úÖ Processed your test dataset
- ‚úÖ Applied model-specific preprocessing
- ‚úÖ Generated predictions
- ‚úÖ Calculated comprehensive performance metrics
- ‚úÖ Created detailed visualizations
- ‚úÖ Exported results to Excel

### Key Metrics Summary:
- **F1-Score**: Harmonic mean of precision and recall
- **Recall**: Ability to find all positive instances
- **Precision**: Accuracy of positive predictions
- **Accuracy**: Overall correctness of predictions

### Next Steps:
1. **Review the detailed metrics** to understand model performance
2. **Analyze the confusion matrix** to identify error patterns
3. **Consider model improvements** based on the recommendations
4. **Test with additional datasets** to validate robustness
5. **Fine-tune hyperparameters** if performance needs improvement

### Files Generated:
- **Excel Report**: Comprehensive results with multiple sheets
- **Visualizations**: Performance charts and analysis plots

Thank you for using this model evaluation framework! üöÄ