In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb
from classes import MixedNaiveBayes

from classes import CustomLogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import RBFSampler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

from helpers import cv_evaluate_model

seed = 777

In [2]:
import pickle

(train_X_folds, train_y_folds, test_X_folds, test_y_folds, feature_names_folds, reverse_map_folds, k_fold_split) = pickle.load(open('data/prepared_data.pkl', 'rb'))

### Baseline

In [3]:
print("=== Baseline ===")

alpha = 10.0
l1_ratio = 0.75

# Train Logistic Regression with L1 & L2 regularization
print("Training Logistic Regression with L1 & L2 regularization...")

logistic_regression_results = cv_evaluate_model(
    lambda: CustomLogisticRegression(
        alpha=alpha,
        l1_ratio=l1_ratio,
        max_iter=1000,
        tol=1e-6,
        class_weight="balanced",
        random_state=seed
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)

=== Baseline ===
Training Logistic Regression with L1 & L2 regularization...
Trained fold 0 in 5.22s
Trained fold 1 in 6.18s
Trained fold 2 in 5.94s
Trained fold 3 in 5.39s
Trained fold 4 in 5.27s


In [4]:
print("Baseline Model Performance Analysis")

print(
    f"\nAverage training time: {logistic_regression_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {logistic_regression_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {logistic_regression_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {logistic_regression_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {logistic_regression_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {logistic_regression_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {logistic_regression_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {logistic_regression_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {logistic_regression_results['overfitting_auc'][0]:.4f}")

cm = logistic_regression_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

feature_importance = pd.Series(logistic_regression_results["results"][0]["model"].coef_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(f"\nMost Important Features:")
print(feature_importance.head(20))

Baseline Model Performance Analysis

Average training time: 5.60s

Metrics
Avg Accuracy:   0.6601
Avg Precision:  0.1811
Avg Recall:     0.5807
Avg F1:         0.2760
Avg ROC AUC:    0.6757
Avg PR AUC:     0.2114
Overfitting Accuracy: 0.0030
Overfitting AUC: 0.0138

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12117           5992           
                Readmit         1001            1324           

Most Important Features:
discharge_disposition_id_11                         -3.544379
discharge_disposition_id_28                          1.409278
discharge_disposition_id_22                          1.322310
discharge_disposition_id_5                           0.998582
discharge_disposition_id_15                          0.751023
discharge_disposition_id_13                         -0.734342
discharge_disposition_id_14                         -0.567374
discharge_disposition_id_2                           0.544543
di

### Naive Bayes

In [5]:
print("=== Naive Bayes ===")

# Train MixedNaiveBayes with balanced classes using cv_evaluate_model format
print("Training MixedNaiveBayes with balanced classes...")

naive_bayes_results = cv_evaluate_model(
    lambda: MixedNaiveBayes(class_weight='balanced'),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)

=== Naive Bayes ===
Training MixedNaiveBayes with balanced classes...
Trained fold 0 in 1.18s
Trained fold 1 in 1.05s
Trained fold 2 in 1.13s
Trained fold 3 in 1.14s
Trained fold 4 in 1.08s


In [6]:
print("Naive Bayes Model Performance Analysis")

print(
    f"\nAverage training time: {naive_bayes_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {naive_bayes_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {naive_bayes_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {naive_bayes_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {naive_bayes_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {naive_bayes_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {naive_bayes_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {naive_bayes_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {naive_bayes_results['overfitting_auc'][0]:.4f}")

cm = naive_bayes_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis for Naive Bayes using log likelihood
print(f"\nMost Important Features:")
model = naive_bayes_results["results"][0]["model"]
train_X_first_fold = train_X_folds[0]
feature_names_first_fold = feature_names_folds[0]

# Get log likelihood for each feature
log_likelihood = model.get_log_likelihood(train_X_first_fold)

# Calculate feature importance as the difference in log likelihood between classes
# Positive values indicate features that favor class 1 (readmission)
feature_importance_values = log_likelihood[:, 1] - log_likelihood[:, 0]

# Create pandas Series for consistent format
feature_importance = pd.Series(feature_importance_values, index=feature_names_first_fold)
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(feature_importance.head(20))

Naive Bayes Model Performance Analysis

Average training time: 1.12s

Metrics
Avg Accuracy:   0.6199
Avg Precision:  0.1446
Avg Recall:     0.4888
Avg F1:         0.2231
Avg ROC AUC:    0.5878
Avg PR AUC:     0.1544
Overfitting Accuracy: 0.0026
Overfitting AUC: 0.0153

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      11473           6636           
                Readmit         1154            1171           

Most Important Features:
discharge_disposition_id_11                 -0.065009
insulin_yes                                  0.026994
admission_source_id_7                        0.017168
discharge_disposition_id_22                 -0.010460
metformin_yes                               -0.008313
age_[50-60)                                 -0.005458
medical_specialty_Cardiology                -0.003493
age_[70-80)                                  0.003474
admission_type_id_3                         -0.003086
discharge

### SVM

In [7]:
print("=== Linear SVM ===")

# Train SGDClassifier with hinge loss (equivalent to SVM) using cv_evaluate_model format
print("Training SGDClassifier with hinge loss (Linear SVM)...")

linear_svm_results = cv_evaluate_model(
    lambda: SGDClassifier(
        loss='hinge',           # Hinge loss = SVM
        max_iter=1000,
        random_state=seed,
        class_weight='balanced',
        learning_rate='optimal'
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.decision_function(X)
)

=== Linear SVM ===
Training SGDClassifier with hinge loss (Linear SVM)...
Trained fold 0 in 6.96s
Trained fold 1 in 4.29s
Trained fold 2 in 3.47s
Trained fold 3 in 4.31s
Trained fold 4 in 5.09s


In [8]:
print("Linear SVM Model Performance Analysis")

print(
    f"\nAverage training time: {linear_svm_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {linear_svm_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {linear_svm_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {linear_svm_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {linear_svm_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {linear_svm_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {linear_svm_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {linear_svm_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {linear_svm_results['overfitting_auc'][0]:.4f}")

cm = linear_svm_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis for Linear SVM (adjusted by standard deviation)
print(f"\nMost Important Features (Std-Adjusted):")

# Get the first fold data for standard deviation calculation
train_X_first_fold = train_X_folds[0]
feature_names_first_fold = feature_names_folds[0]

# Calculate standard deviation for each feature
feature_std = np.std(train_X_first_fold, axis=0)

# Get raw coefficients
raw_coef = linear_svm_results["results"][0]["model"].coef_[0]

# Calculate adjusted feature importance: coefficient * std_dev (preserve direction)
feature_importance_with_direction = raw_coef * feature_std

# Create pandas Series for consistent format with baseline
feature_importance = pd.Series(feature_importance_with_direction, index=feature_names_first_fold)
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]

print(f"\nMost Important Features:")
print(feature_importance.head(20))

Linear SVM Model Performance Analysis

Average training time: 4.83s

Metrics
Avg Accuracy:   0.6536
Avg Precision:  0.1778
Avg Recall:     0.5800
Avg F1:         0.2720
Avg ROC AUC:    0.6686
Avg PR AUC:     0.2069
Overfitting Accuracy: 0.0041
Overfitting AUC: 0.0174

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12496           5613           
                Readmit         1056            1269           

Most Important Features (Std-Adjusted):

Most Important Features:
number_inpatient               0.494645
discharge_disposition_id_11   -0.303617
discharge_disposition_id_22    0.233930
discharge_disposition_id_3     0.227592
discharge_disposition_id_5     0.158642
diag_PC7                       0.128290
diag_PC1                       0.123597
discharge_disposition_id_2     0.119308
diag_PC19                      0.112456
age_[80-90)                    0.107065
discharge_disposition_id_6     0.099763
admission_sourc

### Kernel SVM

In [9]:
print("=== RFF Kernel SVM (Random Fourier Features) ===")

# Tunable parameters
gamma = 0.01  # RBF kernel parameter
n_components = 2000  # Number of random features

# Train RFF with SGD hinge loss (approximation to RBF kernel SVM)
print("Training RFF with SGD hinge loss (RBF kernel approximation)...")

rff_svm_results = cv_evaluate_model(
    lambda: Pipeline([
        ('rff', RBFSampler(gamma=gamma, n_components=n_components, random_state=seed)),
        ('sgd', SGDClassifier(
            loss='hinge',
            max_iter=1000,
            random_state=seed,
            class_weight='balanced',
            learning_rate='optimal'
        ))
    ]),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.decision_function(X)
)


=== RFF Kernel SVM (Random Fourier Features) ===
Training RFF with SGD hinge loss (RBF kernel approximation)...
Trained fold 0 in 20.26s
Trained fold 1 in 15.44s
Trained fold 2 in 16.63s
Trained fold 3 in 17.87s
Trained fold 4 in 16.11s


In [10]:
print("RFF SVM Model Performance Analysis")

print(
    f"\nAverage training time: {rff_svm_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {rff_svm_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {rff_svm_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {rff_svm_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {rff_svm_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {rff_svm_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {rff_svm_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {rff_svm_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {rff_svm_results['overfitting_auc'][0]:.4f}")

cm = rff_svm_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

print("Note: RFF approximates RBF kernel with random Fourier features for faster training")

RFF SVM Model Performance Analysis

Average training time: 17.26s

Metrics
Avg Accuracy:   0.6743
Avg Precision:  0.1803
Avg Recall:     0.5393
Avg F1:         0.2699
Avg ROC AUC:    0.6650
Avg PR AUC:     0.2002
Overfitting Accuracy: 0.0022
Overfitting AUC: 0.0107

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12973           5136           
                Readmit         1173            1152           
Note: RFF approximates RBF kernel with random Fourier features for faster training


### Decision Tree

In [11]:
print("=== Decision Tree ===")

# Tunable parameters
max_depth = 5  # Will be optimized in finetuning
min_samples_split = 200  # Will be optimized in finetuning
min_samples_leaf = 100  # Will be optimized in finetuning
max_features = None  # Will be optimized in finetuning

# Train Decision Tree with balanced classes using cv_evaluate_model format
print("Training Decision Tree with balanced classes...")

decision_tree_results = cv_evaluate_model(
    lambda: DecisionTreeClassifier(
        random_state=seed,
        class_weight='balanced',
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== Decision Tree ===
Training Decision Tree with balanced classes...
Trained fold 0 in 7.92s
Trained fold 1 in 8.62s
Trained fold 2 in 8.13s
Trained fold 3 in 8.18s
Trained fold 4 in 8.39s


In [12]:
print("Decision Tree Model Performance Analysis")

print(
    f"\nAverage training time: {decision_tree_results['training_time'][0]:.2f}s")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {decision_tree_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {decision_tree_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {decision_tree_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {decision_tree_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {decision_tree_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {decision_tree_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {decision_tree_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {decision_tree_results['overfitting_auc'][0]:.4f}")

cm = decision_tree_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(decision_tree_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))


Decision Tree Model Performance Analysis

Average training time: 8.25s

Metrics
Avg Accuracy:   0.6367
Avg Precision:  0.1706
Avg Recall:     0.5838
Avg F1:         0.2640
Avg ROC AUC:    0.6478
Avg PR AUC:     0.2043
Overfitting Accuracy: 0.0014
Overfitting AUC: 0.0064

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      11463           6646           
                Readmit         950             1375           

Most Important Features:
number_inpatient               5.791732e-01
discharge_disposition_id_22    1.179520e-01
discharge_disposition_id_11    1.005952e-01
discharge_disposition_id_3     7.479172e-02
diag_PC1                       5.943218e-02
discharge_disposition_id_5     2.102627e-02
diag_PC10                      7.142425e-03
diag_PC87                      5.403868e-03
diag_PC20                      5.354703e-03
diag_PC59                      4.676635e-03
diag_PC22                      4.310883e-03
diag_PC30

### Random Forest

In [24]:
print("=== Random Forest ===")

# Tunable parameters
n_estimators = 200  # Will be optimized in finetuning
max_depth = 9  # Will be optimized in finetuning
min_samples_split = 200  # Will be optimized in finetuning
min_samples_leaf = 100  # Will be optimized in finetuning
max_features = 'sqrt'  # Will be optimized in finetuning

# Train Random Forest with balanced classes using cv_evaluate_model format
print("Training Random Forest with balanced classes...")

random_forest_results = cv_evaluate_model(
    lambda: RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        class_weight='balanced',
        random_state=seed,
        n_jobs=-1
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== Random Forest ===
Training Random Forest with balanced classes...
Trained fold 0 in 28.26s
Trained fold 1 in 25.73s
Trained fold 2 in 29.81s
Trained fold 3 in 30.76s
Trained fold 4 in 32.26s


In [25]:
print("Random Forest Model Performance Analysis")

print(
    f"\nAverage training time: {random_forest_results['training_time'][0]:.2f}")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {random_forest_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {random_forest_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {random_forest_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {random_forest_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {random_forest_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {random_forest_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {random_forest_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {random_forest_results['overfitting_auc'][0]:.4f}")

cm = random_forest_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(random_forest_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))

Random Forest Model Performance Analysis

Average training time: 29.37

Metrics
Avg Accuracy:   0.6848
Avg Precision:  0.1775
Avg Recall:     0.5022
Avg F1:         0.2623
Avg ROC AUC:    0.6528
Avg PR AUC:     0.1907
Overfitting Accuracy: 0.0274
Overfitting AUC: 0.0935

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12634           5475           
                Readmit         1148            1177           

Most Important Features:
number_inpatient               0.138247
discharge_disposition_id_11    0.029410
diag_PC1                       0.029248
discharge_disposition_id_22    0.028238
number_emergency               0.019479
diag_PC0                       0.016825
number_diagnoses               0.014534
num_medications                0.012450
diag_PC7                       0.012206
diag_PC18                      0.010790
discharge_disposition_id_3     0.010530
diag_PC5                       0.010506
time_in_hospi

### XGBoost

In [30]:
print("=== XGBoost ===")

# Tunable parameters
n_estimators = 200  
max_depth = 4
learning_rate = 0.1 
subsample = 1.0
colsample_bytree = 1.0

# Train XGBoost with balanced classes using cv_evaluate_model format
print("Training XGBoost with balanced classes...")

xgb_results = cv_evaluate_model(
    lambda: xgb.XGBClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        scale_pos_weight=len(train_y_folds[0][train_y_folds[0]==0])/len(train_y_folds[0][train_y_folds[0]==1]),  # Handle class imbalance
        random_state=seed,
        n_jobs=-1,
        eval_metric='logloss'
    ),
    train_X_folds,
    train_y_folds,
    test_X_folds,
    test_y_folds,
    get_decision_score=lambda model, X: model.predict_proba(X)[:, 1]
)


=== XGBoost ===
Training XGBoost with balanced classes...
Trained fold 0 in 3.76s
Trained fold 1 in 3.63s
Trained fold 2 in 3.80s
Trained fold 3 in 3.28s
Trained fold 4 in 3.45s


In [31]:
print("XGBoost Model Performance Analysis")

print(
    f"\nAverage training time: {xgb_results['training_time'][0]:.2f}")

print("\nMetrics")
print(
    f"{'Avg Accuracy:':<15} {xgb_results['test_accuracy'][0]:.4f}")
print(
    f"{'Avg Precision:':<15} {xgb_results['test_precision_score'][0]:.4f}")
print(
    f"{'Avg Recall:':<15} {xgb_results['test_recall_score'][0]:.4f}")
print(f"{'Avg F1:':<15} {xgb_results['test_f1'][0]:.4f}")
print(
    f"{'Avg ROC AUC:':<15} {xgb_results['test_roc_auc'][0]:.4f}")
print(
    f"{'Avg PR AUC:':<15} {xgb_results['test_pr_auc'][0]:.4f}")
print(
    f"{'Overfitting Accuracy:':<15} {xgb_results['overfitting_accuracy'][0]:.4f}")
print(
    f"{'Overfitting AUC:':<15} {xgb_results['overfitting_auc'][0]:.4f}")

cm = xgb_results["results"][0]["test_cm"]

print("\nConfusion Matrix")
print(f"{'':<15} {'Predicted':<15} {'No Readmit':<15} {'Readmit':<15}")
print(f"{'Actual':<15} {'No Readmit':<15} {cm[0,0]:<15} {cm[0,1]:<15}")
print(f"{'':<15} {'Readmit':<15} {cm[1,0]:<15} {cm[1,1]:<15}")

# Feature importance analysis
print(f"\nMost Important Features:")
feature_importance = pd.Series(xgb_results["results"][0]["model"].feature_importances_, index=feature_names_folds[0])
feature_importance = feature_importance[feature_importance.abs().sort_values(ascending=False).index]
print(feature_importance.head(20))


XGBoost Model Performance Analysis

Average training time: 3.58

Metrics
Avg Accuracy:   0.6783
Avg Precision:  0.1843
Avg Recall:     0.5488
Avg F1:         0.2758
Avg ROC AUC:    0.6717
Avg PR AUC:     0.2193
Overfitting Accuracy: 0.0272
Overfitting AUC: 0.0979

Confusion Matrix
                Predicted       No Readmit      Readmit        
Actual          No Readmit      12601           5508           
                Readmit         1063            1262           

Most Important Features:
number_inpatient               0.042081
discharge_disposition_id_22    0.021406
discharge_disposition_id_3     0.019392
discharge_disposition_id_11    0.012461
diag_PC1                       0.010166
admission_type_id_3            0.009642
discharge_disposition_id_5     0.009424
diag_PC141                     0.006912
number_diagnoses               0.006767
discharge_disposition_id_2     0.006299
diag_PC45                      0.006232
admission_type_id_2            0.005954
diag_PC7            

### Comparison

In [36]:
# Create comparison table with all model results
print("=== Model Comparison Table ===")

# Collect all results
models_data = {
    'Logistic Regression': logistic_regression_results,
    'Naive Bayes': naive_bayes_results,
    'Linear SVM': linear_svm_results,
    'Kernel SVM': rff_svm_results,
    'Decision Tree': decision_tree_results,
    'Random Forest': random_forest_results,
    'XGBoost': xgb_results
}

# Create comparison DataFrame with numeric values for better analysis
comparison_data = []
for model_name, results in models_data.items():
    comparison_data.append({
        'Model': model_name,
        'Training Time (s)': results['training_time'][0],
        'Accuracy': results['test_accuracy'][0],
        'Precision': results['test_precision_score'][0],
        'Recall': results['test_recall_score'][0],
        'F1 Score': results['test_f1'][0],
        'ROC AUC': results['test_roc_auc'][0],
        'PR AUC': results['test_pr_auc'][0],
        'Overfitting Acc': results['overfitting_accuracy'][0],
        'Overfitting AUC': results['overfitting_auc'][0]
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df

=== Model Comparison Table ===


Unnamed: 0,Model,Training Time (s),Accuracy,Precision,Recall,F1 Score,ROC AUC,PR AUC,Overfitting Acc,Overfitting AUC
0,Logistic Regression,5.599105,0.66007,0.181064,0.580709,0.276027,0.675686,0.211393,0.003034,0.013802
1,Naive Bayes,1.117618,0.619935,0.144602,0.488756,0.223074,0.587833,0.154446,0.002617,0.015294
2,Linear SVM,4.825053,0.653565,0.177796,0.58002,0.272037,0.668573,0.206887,0.004067,0.017443
3,Kernel SVM,17.260378,0.674337,0.180277,0.539319,0.269877,0.664965,0.200175,0.002184,0.010716
4,Decision Tree,8.24796,0.636725,0.170579,0.583801,0.263993,0.64782,0.204315,0.00139,0.006404
5,Random Forest,29.365206,0.684776,0.177534,0.502223,0.262297,0.652826,0.190662,0.027428,0.093529
6,XGBoost,3.583205,0.67827,0.184252,0.548798,0.275815,0.671737,0.219269,0.027156,0.09787


In [39]:
# Analysis of the comparison results
print("=== Model Performance Analysis ===")

# Find best performing models for each metric
print("\n" + "=" * 50)
print("BEST PERFORMING MODELS:")
print("=" * 50)

# Since comparison_df now has numeric values, we can work directly with it
for metric in ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC', 'PR AUC']:
    best_idx = comparison_df[metric].idxmax()
    best_model = comparison_df.loc[best_idx, 'Model']
    best_score = comparison_df.loc[best_idx, metric]
    print(f"{metric:<12}: {best_model:<20} ({best_score:.4f})")

print("\n" + "=" * 50)
print("OVERFITTING ANALYSIS:")
print("=" * 50)
# For overfitting, we want the HIGHEST values (most overfitting)
for metric in ['Overfitting Acc', 'Overfitting AUC']:
    worst_idx = comparison_df[metric].idxmax()  # Use idxmax for overfitting (higher is worse)
    worst_model = comparison_df.loc[worst_idx, 'Model']
    worst_score = comparison_df.loc[worst_idx, metric]
    print(f"{metric:<12}: {worst_model:<20} ({worst_score:.4f}) - HIGHER IS WORSE")

print("\n" + "=" * 50)
print("TRAINING TIME ANALYSIS:")
print("=" * 50)
fastest_idx = comparison_df['Training Time (s)'].idxmin()
fastest_model = comparison_df.loc[fastest_idx, 'Model']
fastest_time = comparison_df.loc[fastest_idx, 'Training Time (s)']
print(f"Fastest Training: {fastest_model:<20} ({fastest_time:.2f}s)")

slowest_idx = comparison_df['Training Time (s)'].idxmax()
slowest_model = comparison_df.loc[slowest_idx, 'Model']
slowest_time = comparison_df.loc[slowest_idx, 'Training Time (s)']
print(f"Slowest Training: {slowest_model:<20} ({slowest_time:.2f}s)")

print("\n" + "=" * 50)
print("RANKING BY ROC AUC (Best to Worst):")
print("=" * 50)
roc_ranking = comparison_df.sort_values('ROC AUC', ascending=False)[['Model', 'ROC AUC']]
for idx, row in roc_ranking.iterrows():
    print(f"{row['Model']:<20}: {row['ROC AUC']:.4f}")


=== Model Performance Analysis ===

BEST PERFORMING MODELS:
Accuracy    : Random Forest        (0.6848)
Precision   : XGBoost              (0.1843)
Recall      : Decision Tree        (0.5838)
F1 Score    : Logistic Regression  (0.2760)
ROC AUC     : Logistic Regression  (0.6757)
PR AUC      : XGBoost              (0.2193)

OVERFITTING ANALYSIS:
Overfitting Acc: Random Forest        (0.0274) - HIGHER IS WORSE
Overfitting AUC: XGBoost              (0.0979) - HIGHER IS WORSE

TRAINING TIME ANALYSIS:
Fastest Training: Naive Bayes          (1.12s)
Slowest Training: Random Forest        (29.37s)

RANKING BY ROC AUC (Best to Worst):
Logistic Regression : 0.6757
XGBoost             : 0.6717
Linear SVM          : 0.6686
Kernel SVM          : 0.6650
Random Forest       : 0.6528
Decision Tree       : 0.6478
Naive Bayes         : 0.5878
