In [1]:
# svm_results_presentation.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from joblib import load
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, classification_report, roc_auc_score, 
                            roc_curve, confusion_matrix, precision_recall_curve,
                            precision_score, recall_score, f1_score)

def main():
    print("Loading trained SVM model...")
    try:
        # Load the trained model
        model = load('bank_marketing_svm_model.joblib')
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {e}")
        return
    
    # Fetch the dataset
    print("Fetching dataset...")
    bank_marketing = fetch_ucirepo(id=222)
    X = bank_marketing.data.features
    y = bank_marketing.data.targets
    y = y.values.ravel()  # Convert y to 1D array
    
    # Split the data (using same random_state to match training split)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    print(f"Test set size: {X_test.shape}")
    
    # Make predictions
    print("Generating predictions...")
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    print("\n===== SVM Model Evaluation =====")
    print(f"Accuracy: {accuracy:.2%}")
    print(f"ROC AUC: {roc_auc:.2%}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Create visualizations
    print("Creating visualizations...")
    
    # Create a figure with multiple subplots for evaluation
    plt.figure(figsize=(20, 15))
    
    # 1. ROC Curve
    plt.subplot(2, 2, 1)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc='lower right')
    
    # 2. Precision-Recall Curve
    plt.subplot(2, 2, 2)
    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)
    plt.plot(recall, precision, marker='.')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    
    # 3. Confusion Matrix
    plt.subplot(2, 2, 3)
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    
    # 4. Probability Distribution
    plt.subplot(2, 2, 4)
    sns.histplot(y_pred_proba[y_test == 0], color='red', alpha=0.5, bins=50, kde=True, label='Class 0')
    sns.histplot(y_pred_proba[y_test == 1], color='blue', alpha=0.5, bins=50, kde=True, label='Class 1')
    plt.xlabel('Predicted Probability')
    plt.ylabel('Count')
    plt.title('Probability Distribution by Class')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('svm_evaluation_results.png')
    print("Saved evaluation plot to 'svm_evaluation_results.png'")
    
    # Threshold Analysis
    plt.figure(figsize=(10, 6))
    thresholds = np.arange(0.1, 1.0, 0.05)
    results = []
    
    for threshold in thresholds:
        y_pred_threshold = (y_pred_proba >= threshold).astype(int)
        precision = precision_score(y_test, y_pred_threshold, zero_division=0)
        recall = recall_score(y_test, y_pred_threshold)
        f1 = f1_score(y_test, y_pred_threshold, zero_division=0)
        results.append([threshold, precision, recall, f1])
    
    threshold_df = pd.DataFrame(results, columns=['Threshold', 'Precision', 'Recall', 'F1'])
    
    plt.plot(threshold_df['Threshold'], threshold_df['Precision'], 'b-', label='Precision')
    plt.plot(threshold_df['Threshold'], threshold_df['Recall'], 'g-', label='Recall')
    plt.plot(threshold_df['Threshold'], threshold_df['F1'], 'r-', label='F1')
    plt.xlabel('Threshold')
    plt.ylabel('Score')
    plt.title('Model Performance at Different Thresholds')
    plt.legend()
    plt.grid(True)
    plt.savefig('svm_threshold_analysis_results.png')
    print("Saved threshold analysis to 'svm_threshold_analysis_results.png'")
    
    # Feature importance (for linear kernel only)
    try:
        if hasattr(model.named_steps['classifier'], 'kernel') and model.named_steps['classifier'].kernel == 'linear':
            # Get feature names after one-hot encoding
            preprocessor = model.named_steps['preprocessor']
            cat_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
            num_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
            
            # Get one-hot encoder feature names
            ohe = preprocessor.named_transformers_['cat']
            cat_features = ohe.get_feature_names_out(cat_cols)
            
            # Combine with numerical features
            all_features = np.concatenate([num_cols, cat_features])
            
            # Get coefficients
            coefficients = model.named_steps['classifier'].coef_[0]
            
            # Create feature importance DataFrame
            feature_importance = pd.DataFrame({
                'Feature': all_features,
                'Importance': np.abs(coefficients)
            }).sort_values('Importance', ascending=False)
            
            # Plot top 20 features
            plt.figure(figsize=(12, 8))
            sns.barplot(x='Importance', y='Feature', data=feature_importance.head(20))
            plt.title('Top 20 Features by Importance (Linear SVM)')
            plt.tight_layout()
            plt.savefig('svm_feature_importance.png')
            print("Saved feature importance plot to 'svm_feature_importance.png'")
    except Exception as e:
        print(f"Could not create feature importance plot: {e}")
    
    print("\nResults presentation completed!")

if __name__ == "__main__":
    main()

Loading trained SVM model...
Error loading model: [Errno 2] No such file or directory: 'bank_marketing_svm_model.joblib'
