In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb
from classes import MixedNaiveBayes

from classes import CustomLogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from helpers import cv_evaluate_model

seed = 777

In [2]:
import pickle

(train_X_folds, train_y_folds, test_X_folds, test_y_folds, feature_names_folds, reverse_map_folds) = pickle.load(open('data/prepared_data.pkl', 'rb'))

### Baseline

In [72]:
print("=== Baseline ===")

alpha = 5
l1_ratio = 0.75

# Train Logistic Regression with L1 & L2 regularization
print("Training Logistic Regression with L1 & L2 regularization...")

logistic_regression_results = cv_evaluate_model(
    lambda: CustomLogisticRegression(
        alpha=alpha,
        l1_ratio=l1_ratio,
        max_iter=1000,
        tol=1e-6,
        class_weight="balanced",
        random_state=seed
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)

=== Baseline ===
Training Logistic Regression with L1 & L2 regularization...
Trained fold 0 in 7.83s
Trained fold 1 in 6.92s
Trained fold 2 in 7.50s
Trained fold 3 in 6.75s
Trained fold 4 in 6.91s


In [73]:
print("Baseline Model Performance Analysis")

print(
    f"\nAverage training time: {logistic_regression_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {logistic_regression_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {logistic_regression_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {logistic_regression_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {logistic_regression_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {logistic_regression_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {logistic_regression_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {logistic_regression_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {logistic_regression_results['overfitting_auc'][0]:.4f}")

cm = logistic_regression_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

feature_importance = pd.Series(logistic_regression_results["results"][0]["model"].coef_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(f"\nMost Important Features:")
print(feature_importance.head(20))

Baseline Model Performance Analysis

Average training time: 7.18s

Metrics
Avg Accuracy:   0.6582
Avg Precision:  0.1800
Avg Recall:     0.5801
Avg F1:         0.2747
Avg ROC AUC:    0.6745
Avg PR AUC:     0.2111
Overfitting Accuracy: 0.0037
Overfitting AUC: 0.0168

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12089           6020           
                Readmit         994             1331           

Most Important Features:
discharge_disposition_id_11                  -4.215332
discharge_disposition_id_28                   1.689171
discharge_disposition_id_15                   1.364386
discharge_disposition_id_22                   1.351553
discharge_disposition_id_5                    1.030752
discharge_disposition_id_13                  -0.809176
medical_specialty_Otolaryngology             -0.775223
medical_specialty_Pediatrics-Endocrinology   -0.770679
medical_specialty_Hematology                  0.749855
dis

### Naive Bayes

In [5]:
print("=== Naive Bayes ===")

# Train MixedNaiveBayes with balanced classes using cv_evaluate_model format
print("Training MixedNaiveBayes with balanced classes...")

naive_bayes_results = cv_evaluate_model(
    lambda: MixedNaiveBayes(class_weight='balanced'),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)

=== Naive Bayes ===
Training MixedNaiveBayes with balanced classes...
Trained fold 0 in 1.03s
Trained fold 1 in 0.96s
Trained fold 2 in 1.03s
Trained fold 3 in 0.98s
Trained fold 4 in 1.12s


In [6]:
print("Naive Bayes Model Performance Analysis")

print(
    f"\nAverage training time: {naive_bayes_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {naive_bayes_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {naive_bayes_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {naive_bayes_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {naive_bayes_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {naive_bayes_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {naive_bayes_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {naive_bayes_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {naive_bayes_results['overfitting_auc'][0]:.4f}")

cm = naive_bayes_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis for Naive Bayes using log likelihood
print(f"\nMost Important Features:")
model = naive_bayes_results["results"][0]["model"]
train_X_first_fold = train_X_folds[0]
feature_names_first_fold = feature_names_folds[0]

# Get log likelihood for each feature
log_likelihood = model.get_log_likelihood(train_X_first_fold)

# Calculate feature importance as the difference in log likelihood between classes
# Positive values indicate features that favor class 1 (readmission)
feature_importance_values = log_likelihood[:, 1] - log_likelihood[:, 0]

# Create pandas Series for consistent format
feature_importance = pd.Series(feature_importance_values, index=feature_names_first_fold)
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(feature_importance.head(20))

Naive Bayes Model Performance Analysis

Average training time: 1.02s

Metrics
Avg Accuracy:   0.6199
Avg Precision:  0.1446
Avg Recall:     0.4888
Avg F1:         0.2231
Avg ROC AUC:    0.5878
Avg PR AUC:     0.1544
Overfitting Accuracy: 0.0026
Overfitting AUC: 0.0153

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      11473           6636           
                Readmit         1154            1171           

Most Important Features:
discharge_disposition_id_11                 -0.065009
insulin_yes                                  0.026994
admission_source_id_7                        0.017168
discharge_disposition_id_22                 -0.010460
metformin_yes                               -0.008313
age_[50-60)                                 -0.005458
medical_specialty_Cardiology                -0.003493
age_[70-80)                                  0.003474
admission_type_id_3                         -0.003086
discharge

### SVM

In [None]:
print("=== Linear SVM ===")

# Train SGDClassifier with hinge loss (equivalent to SVM) using cv_evaluate_model format
print("Training SGDClassifier with hinge loss (Linear SVM)...")

linear_svm_results = cv_evaluate_model(
    lambda: SGDClassifier(
        loss='hinge',           # Hinge loss = SVM
        max_iter=1000,
        random_state=seed,
        class_weight='balanced',
        learning_rate='optimal'
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.decision_function(X)
)

=== Linear SVM ===
Training SGDClassifier with hinge loss (Linear SVM)...
Trained fold 0 in 6.76s
Trained fold 1 in 4.22s
Trained fold 2 in 3.51s
Trained fold 3 in 4.30s
Trained fold 4 in 5.33s


In [8]:
print("Linear SVM Model Performance Analysis")

print(
    f"\nAverage training time: {linear_svm_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {linear_svm_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {linear_svm_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {linear_svm_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {linear_svm_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {linear_svm_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {linear_svm_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {linear_svm_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {linear_svm_results['overfitting_auc'][0]:.4f}")

cm = linear_svm_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis for Linear SVM (adjusted by standard deviation)
print(f"\nMost Important Features (Std-Adjusted):")

# Get the first fold data for standard deviation calculation
train_X_first_fold = train_X_folds[0]
feature_names_first_fold = feature_names_folds[0]

# Calculate standard deviation for each feature
feature_std = np.std(train_X_first_fold, axis=0)

# Get raw coefficients
raw_coef = linear_svm_results["results"][0]["model"].coef_[0]

# Calculate adjusted feature importance: coefficient * std_dev (preserve direction)
feature_importance_with_direction = raw_coef * feature_std

# Create pandas Series for consistent format with baseline
feature_importance = pd.Series(feature_importance_with_direction, index=feature_names_first_fold)
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(f"\nMost Important Features:")
print(feature_importance.head(20))

Linear SVM Model Performance Analysis

Average training time: 4.82s

Metrics
Avg Accuracy:   0.6536
Avg Precision:  0.1778
Avg Recall:     0.5800
Avg F1:         0.2720
Avg ROC AUC:    0.6686
Avg PR AUC:     0.2069
Overfitting Accuracy: 0.0041
Overfitting AUC: 0.0174

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12496           5613           
                Readmit         1056            1269           

Most Important Features (Std-Adjusted):

Most Important Features:
number_inpatient               0.494645
discharge_disposition_id_11   -0.303617
discharge_disposition_id_22    0.233930
discharge_disposition_id_3     0.227592
discharge_disposition_id_5     0.158642
diag_PC7                       0.128290
diag_PC1                       0.123597
discharge_disposition_id_2     0.119308
diag_PC19                      0.112456
age_[80-90)                    0.107065
discharge_disposition_id_6     0.099763
admission_sourc

### Kernel SVM

In [None]:
print("=== RFF Kernel SVM (Random Fourier Features) ===")

# Tunable parameters
gamma = 0.01  # RBF kernel parameter
n_components = 3000  # Number of random features

# Train RFF with SGD hinge loss (approximation to RBF kernel SVM)
print("Training RFF with SGD hinge loss (RBF kernel approximation)...")

rff_svm_results = cv_evaluate_model(
    lambda: Pipeline([
        ('rff', RBFSampler(gamma=gamma, n_components=n_components, random_state=seed)),
        ('sgd', SGDClassifier(
            loss='hinge',
            max_iter=1000,
            random_state=seed,
            class_weight='balanced',
            learning_rate='optimal'
        ))
    ]),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.decision_function(X)
)


=== RFF Kernel SVM (Random Fourier Features) ===
Training RFF with SGD hinge loss (RBF kernel approximation)...
Trained fold 0 in 21.36s
Trained fold 1 in 30.92s
Trained fold 2 in 22.53s
Trained fold 3 in 22.52s
Trained fold 4 in 21.34s


In [29]:
print("RFF SVM Model Performance Analysis")

print(
    f"\nAverage training time: {rff_svm_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {rff_svm_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {rff_svm_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {rff_svm_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {rff_svm_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {rff_svm_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {rff_svm_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {rff_svm_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {rff_svm_results['overfitting_auc'][0]:.4f}")

cm = rff_svm_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

print("Note: RFF approximates RBF kernel with random Fourier features for faster training")

RFF SVM Model Performance Analysis

Average training time: 23.74s

Metrics
Avg Accuracy:   0.6600
Avg Precision:  0.1783
Avg Recall:     0.5626
Avg F1:         0.2701
Avg ROC AUC:    0.6665
Avg PR AUC:     0.2006
Overfitting Accuracy: 0.0027
Overfitting AUC: 0.0103

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      11778           6331           
                Readmit         990             1335           
Note: RFF approximates RBF kernel with random Fourier features for faster training


### Decision Tree

In [36]:
print("=== Decision Tree ===")

# Tunable parameters
max_depth = 7  # Will be optimized in finetuning
min_samples_split = 200  # Will be optimized in finetuning
min_samples_leaf = 100  # Will be optimized in finetuning
max_features = None  # Will be optimized in finetuning

# Train Decision Tree with balanced classes using cv_evaluate_model format
print("Training Decision Tree with balanced classes...")

decision_tree_results = cv_evaluate_model(
    lambda: DecisionTreeClassifier(
        random_state=seed,
        class_weight='balanced',
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== Decision Tree ===
Training Decision Tree with balanced classes...
Trained fold 0 in 11.60s
Trained fold 1 in 10.85s
Trained fold 2 in 11.32s
Trained fold 3 in 10.53s
Trained fold 4 in 10.66s


In [37]:
print("Decision Tree Model Performance Analysis")

print(
    f"\nAverage training time: {decision_tree_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {decision_tree_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {decision_tree_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {decision_tree_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {decision_tree_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {decision_tree_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {decision_tree_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {decision_tree_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {decision_tree_results['overfitting_auc'][0]:.4f}")

cm = decision_tree_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(decision_tree_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))


Decision Tree Model Performance Analysis

Average training time: 10.99s

Metrics
Avg Accuracy:   0.6566
Avg Precision:  0.1731
Avg Recall:     0.5489
Avg F1:         0.2631
Avg ROC AUC:    0.6451
Avg PR AUC:     0.1984
Overfitting Accuracy: 0.0068
Overfitting AUC: 0.0245

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12318           5791           
                Readmit         1084            1241           

Most Important Features:
number_inpatient               0.494880
discharge_disposition_id_22    0.100785
discharge_disposition_id_11    0.085955
discharge_disposition_id_3     0.063906
diag_PC1                       0.050782
discharge_disposition_id_5     0.032369
diag_PC128                     0.015816
number_diagnoses               0.013297
diag_PC32                      0.009643
diag_PC43                      0.009213
diag_PC59                      0.008929
discharge_disposition_id_2     0.007524
diag_PC143  

### Random Forest

In [49]:
print("=== Random Forest ===")

# Tunable parameters
n_estimators = 200  # Will be optimized in finetuning
max_depth = 7  # Will be optimized in finetuning
min_samples_split = 200  # Will be optimized in finetuning
min_samples_leaf = 100  # Will be optimized in finetuning
max_features = 'sqrt'  # Will be optimized in finetuning

# Train Random Forest with balanced classes using cv_evaluate_model format
print("Training Random Forest with balanced classes...")

random_forest_results = cv_evaluate_model(
    lambda: RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        class_weight='balanced',
        random_state=seed,
        n_jobs=-1
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== Random Forest ===
Training Random Forest with balanced classes...
Trained fold 0 in 14.57s
Trained fold 1 in 12.81s
Trained fold 2 in 12.09s
Trained fold 3 in 12.21s
Trained fold 4 in 14.78s


In [50]:
print("Random Forest Model Performance Analysis")

print(
    f"\nAverage training time: {random_forest_results['training_time'][0]:.2f}")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {random_forest_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {random_forest_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {random_forest_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {random_forest_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {random_forest_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {random_forest_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {random_forest_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {random_forest_results['overfitting_auc'][0]:.4f}")

cm = random_forest_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(random_forest_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))

Random Forest Model Performance Analysis

Average training time: 13.29

Metrics
Avg Accuracy:   0.6574
Avg Precision:  0.1708
Avg Recall:     0.5370
Avg F1:         0.2591
Avg ROC AUC:    0.6519
Avg PR AUC:     0.1910
Overfitting Accuracy: 0.0185
Overfitting AUC: 0.0601

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      11998           6111           
                Readmit         1048            1277           

Most Important Features:
number_inpatient               0.150782
discharge_disposition_id_11    0.043483
diag_PC1                       0.037378
discharge_disposition_id_22    0.036809
number_emergency               0.027408
number_diagnoses               0.021198
diag_PC0                       0.019192
num_medications                0.014960
diag_PC19                      0.014304
diag_PC5                       0.012999
time_in_hospital               0.012438
diag_PC18                      0.012280
discharge_dis

### XGBoost

In [68]:
print("=== XGBoost ===")

# Tunable parameters
n_estimators = 200  # Will be optimized in finetuning
max_depth = 3  # Will be optimized in finetuning
learning_rate = 0.1  # Will be optimized in finetuning
subsample = 0.8  # Will be optimized in finetuning
colsample_bytree = 0.8  # Will be optimized in finetuning

# Train XGBoost with balanced classes using cv_evaluate_model format
print("Training XGBoost with balanced classes...")

xgb_results = cv_evaluate_model(
    lambda: xgb.XGBClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        scale_pos_weight=len(train_y_folds[0][train_y_folds[0]==0])/len(train_y_folds[0][train_y_folds[0]==1]),  # Handle class imbalance
        random_state=seed,
        n_jobs=-1,
        eval_metric='logloss'
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== XGBoost ===
Training XGBoost with balanced classes...
Trained fold 0 in 3.30s
Trained fold 1 in 3.03s
Trained fold 2 in 2.85s
Trained fold 3 in 2.91s
Trained fold 4 in 2.84s


In [69]:
print("XGBoost Model Performance Analysis")

print(
    f"\nAverage training time: {xgb_results['training_time'][0]:.2f}")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {xgb_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {xgb_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {xgb_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {xgb_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {xgb_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {xgb_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {xgb_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {xgb_results['overfitting_auc'][0]:.4f}")

cm = xgb_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(xgb_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))


XGBoost Model Performance Analysis

Average training time: 2.99

Metrics
Avg Accuracy:   0.6690
Avg Precision:  0.1829
Avg Recall:     0.5668
Avg F1:         0.2765
Avg ROC AUC:    0.6747
Avg PR AUC:     0.2208
Overfitting Accuracy: 0.0165
Overfitting AUC: 0.0575

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12305           5804           
                Readmit         996             1329           

Most Important Features:
number_inpatient               0.035053
discharge_disposition_id_22    0.019119
diag_PC1                       0.015644
discharge_disposition_id_11    0.014054
discharge_disposition_id_3     0.013836
number_diagnoses               0.008634
discharge_disposition_id_5     0.008546
diag_PC0                       0.007733
number_emergency               0.007637
diag_PC45                      0.007046
diag_PC7                       0.006701
discharge_disposition_id_2     0.006171
diag_PC35           

### Comparison

In [75]:
# Create comparison table with all model results
print("=== Model Comparison Table ===")

# Collect all results
models_data = {
    'Logistic Regression': logistic_regression_results,
    'Naive Bayes': naive_bayes_results,
    'Linear SVM': linear_svm_results,
    'Kernel SVM': rff_svm_results,
    'Decision Tree': decision_tree_results,
    'Random Forest': random_forest_results,
    'XGBoost': xgb_results
}

# Create comparison DataFrame
comparison_data = []
for model_name, results in models_data.items():
    comparison_data.append({
        'Model': model_name,
        'Training Time (s)': f"{results['training_time'][0]:.2f}",
        'Accuracy': f"{results['test_accuracy'][0]:.4f}",
        'Precision': f"{results['test_precision_score'][0]:.4f}",
        'Recall': f"{results['test_recall_score'][0]:.4f}",
        'F1 Score': f"{results['test_f1'][0]:.4f}",
        'ROC AUC': f"{results['test_roc_auc'][0]:.4f}",
        'PR AUC': f"{results['test_pr_auc'][0]:.4f}",
        'Overfitting Acc': f"{results['overfitting_accuracy'][0]:.4f}",
        'Overfitting AUC': f"{results['overfitting_auc'][0]:.4f}"
    })

comparison_df = pd.DataFrame(comparison_data)

# Display the comparison table
print("\nModel Performance Comparison:")
print("=" * 100)
print(comparison_df.to_string(index=False))

# Find best performing models for each metric
print("\n" + "=" * 50)
print("BEST PERFORMING MODELS:")
print("=" * 50)

# Convert numeric columns for comparison (excluding the commented row)
numeric_data = comparison_df[comparison_df['Model'] != '# RBF SVM (commented out - too slow)'].copy()
for col in ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC', 'PR AUC', 'Overfitting Acc', 'Overfitting AUC']:
    numeric_data[col] = numeric_data[col].astype(float)

for metric in ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC', 'PR AUC']:
    best_idx = numeric_data[metric].idxmax()
    best_model = numeric_data.loc[best_idx, 'Model']
    best_score = numeric_data.loc[best_idx, metric]
    print(f"{metric:<12}: {best_model:<20} ({best_score:.4f})")

print("\n" + "=" * 50)
print("OVERFITTING ANALYSIS:")
print("=" * 50)
# For overfitting, we want the HIGHEST values (most overfitting)
for metric in ['Overfitting Acc', 'Overfitting AUC']:
    worst_idx = numeric_data[metric].idxmax()  # Use idxmax for overfitting (higher is worse)
    worst_model = numeric_data.loc[worst_idx, 'Model']
    worst_score = numeric_data.loc[worst_idx, metric]
    print(f"{metric:<12}: {worst_model:<20} ({worst_score:.4f}) - HIGHER IS WORSE")

print("\n" + "=" * 50)
print("TRAINING TIME ANALYSIS:")
print("=" * 50)
fastest_idx = numeric_data['Training Time (s)'].astype(float).idxmin()
fastest_model = numeric_data.loc[fastest_idx, 'Model']
fastest_time = numeric_data.loc[fastest_idx, 'Training Time (s)']
print(f"Fastest Training: {fastest_model:<20} ({fastest_time}s)")

slowest_idx = numeric_data['Training Time (s)'].astype(float).idxmax()
slowest_model = numeric_data.loc[slowest_idx, 'Model']
slowest_time = numeric_data.loc[slowest_idx, 'Training Time (s)']
print(f"Slowest Training: {slowest_model:<20} ({slowest_time}s)")


=== Model Comparison Table ===

Model Performance Comparison:
              Model Training Time (s) Accuracy Precision Recall F1 Score ROC AUC PR AUC Overfitting Acc Overfitting AUC
Logistic Regression              7.18   0.6582    0.1800 0.5801   0.2747  0.6745 0.2111          0.0037          0.0168
        Naive Bayes              1.02   0.6199    0.1446 0.4888   0.2231  0.5878 0.1544          0.0026          0.0153
         Linear SVM              4.82   0.6536    0.1778 0.5800   0.2720  0.6686 0.2069          0.0041          0.0174
         Kernel SVM             23.74   0.6600    0.1783 0.5626   0.2701  0.6665 0.2006          0.0027          0.0103
      Decision Tree             10.99   0.6566    0.1731 0.5489   0.2631  0.6451 0.1984          0.0068          0.0245
      Random Forest             13.29   0.6574    0.1708 0.5370   0.2591  0.6519 0.1910          0.0185          0.0601
            XGBoost              2.99   0.6690    0.1829 0.5668   0.2765  0.6747 0.2208          0