# Heart Failure Prediction
This notebook evaluates models for predicting heart failure using a combination of health-related features.

In [3]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score

# Load the dataset
data = pd.read_csv('./data/csv/clean_data.csv')

# Create the target variable: HeartFailure
data['HeartFailure'] = (data['HadHeartAttack'] == 1.0) | (data['HadAngina'] == 1.0)

# Drop original columns used to create the target variable
data.drop(columns=['HadHeartAttack', 'HadAngina'], inplace=True)

# Display dataset overview
data.head()


Unnamed: 0,State,Sex,GeneralHealth,PhysicalHealthDays,MentalHealthDays,LastCheckupTime,PhysicalActivities,SleepHours,HadStroke,HadArthritis,...,SmokerStatus,ECigaretteUsage,RaceEthnicityCategory,AgeCategory,HeightInMeters,WeightInKilograms,BMI,AlcoholDrinkers,HighRiskLastYear,HeartFailure
0,11,1,2.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,...,0.0,1.0,3.0,12.0,,,,0.0,0.0,False
1,11,1,0.0,0.0,0.0,,0.0,6.0,0.0,0.0,...,0.0,0.0,3.0,12.0,1.6,68.04,26.57,0.0,0.0,False
2,11,1,2.0,2.0,3.0,0.0,1.0,5.0,0.0,0.0,...,0.0,0.0,3.0,7.0,1.57,63.5,25.61,0.0,0.0,False
3,11,1,0.0,0.0,0.0,0.0,1.0,7.0,0.0,1.0,...,2.0,0.0,3.0,,1.65,63.5,23.3,0.0,0.0,False
4,11,1,3.0,2.0,0.0,0.0,1.0,9.0,0.0,0.0,...,0.0,0.0,3.0,4.0,1.57,53.98,21.77,1.0,0.0,False


Preprocessing the data

In [4]:
# Check for missing values
print("Missing values per column:")
print(data.isnull().sum())

# Fill missing values with column median
data.fillna(data.median(), inplace=True)

# Verify no missing values remain
print("Missing values after imputation:")
print(data.isnull().sum())

Missing values per column:
State                        0
Sex                          0
GeneralHealth             1198
PhysicalHealthDays       10927
MentalHealthDays          9067
LastCheckupTime           8308
PhysicalActivities        1093
SleepHours                5453
HadStroke                 1557
HadArthritis              2633
HadDiabetes              15252
SmokerStatus             35462
ECigaretteUsage          35660
RaceEthnicityCategory    14057
AgeCategory               9079
HeightInMeters           28652
WeightInKilograms        42078
BMI                      48806
AlcoholDrinkers          46574
HighRiskLastYear         50623
HeartFailure                 0
dtype: int64
Missing values after imputation:
State                    0
Sex                      0
GeneralHealth            0
PhysicalHealthDays       0
MentalHealthDays         0
LastCheckupTime          0
PhysicalActivities       0
SleepHours               0
HadStroke                0
HadArthritis             0
HadDia

In [7]:
# Splitting the dataset into features and target
X = data.drop(columns=['HeartFailure'])
y = data['HeartFailure'].astype(int)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [8]:
# Initialize and train models
# Model 1: Logistic Regression
logistic_model = LogisticRegression(max_iter=10000, random_state=42)
logistic_model.fit(X_train, y_train)

# Model 2: Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate models
logistic_preds = logistic_model.predict(X_test)
rf_preds = rf_model.predict(X_test)

# Print evaluation metrics for both models
print("Logistic Regression Evaluation:")
print(classification_report(y_test, logistic_preds))
print(f"Accuracy: {accuracy_score(y_test, logistic_preds)}")
print(f"ROC AUC: {roc_auc_score(y_test, logistic_preds)}")

print("\nRandom Forest Evaluation:")
print(classification_report(y_test, rf_preds))
print(f"Accuracy: {accuracy_score(y_test, rf_preds)}")
print(f"ROC AUC: {roc_auc_score(y_test, rf_preds)}")


Logistic Regression Evaluation:
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     81077
           1       0.54      0.09      0.15      7950

    accuracy                           0.91     89027
   macro avg       0.73      0.54      0.55     89027
weighted avg       0.88      0.91      0.88     89027

Accuracy: 0.911757107394386
ROC AUC: 0.5396639819759816

Random Forest Evaluation:
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     81077
           1       0.48      0.06      0.11      7950

    accuracy                           0.91     89027
   macro avg       0.70      0.53      0.53     89027
weighted avg       0.88      0.91      0.88     89027

Accuracy: 0.9102182483965539
ROC AUC: 0.5268498980276767


Above we can see that the recall for "1" which is heart failure, extremely low and therefore is a sub optimal outcome.

### Model Optimization
Iterative changes to improve performance, focusing on reducing false negatives.

In [10]:
# Example optimization: Tuning Random Forest
optimized_rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
optimized_rf.fit(X_train, y_train)

# Evaluate optimized model
optimized_rf_preds = optimized_rf.predict(X_test)

print("Optimized Random Forest Evaluation:")
print(classification_report(y_test, optimized_rf_preds))
print(f"Accuracy: {accuracy_score(y_test, optimized_rf_preds)}")
print(f"ROC AUC: {roc_auc_score(y_test, optimized_rf_preds)}")


Optimized Random Forest Evaluation:
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     81077
           1       0.60      0.02      0.05      7950

    accuracy                           0.91     89027
   macro avg       0.76      0.51      0.50     89027
weighted avg       0.89      0.91      0.87     89027

Accuracy: 0.9114538286137913
ROC AUC: 0.5114180517735955


### Model Optimization
Iterative changes to improve performance, focusing on reducing false negatives.\
Note that ROC AUC is the correct metric to evaluate our model’s performance, not R-squared, because the focus is on the Model’s ability to distinguish classes. It's a classificaiton problem.\
In contrast, R-squared focuses on variance explained in the target variable, for regression problems.

Since we are working with a classification problem (predicting heart failure), ROC AUC is the correct metric to evaluate our model’s performance.

In [11]:
# Document optimization iterations in a table
optimization_results = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest', 'Optimized Random Forest'],
    'Accuracy': [accuracy_score(y_test, logistic_preds),
                 accuracy_score(y_test, rf_preds),
                 accuracy_score(y_test, optimized_rf_preds)],
    'ROC AUC': [roc_auc_score(y_test, logistic_preds),
                roc_auc_score(y_test, rf_preds),
                roc_auc_score(y_test, optimized_rf_preds)]
})

optimization_results.to_csv('./data/csv/model_optimization_results.csv', index=False)
optimization_results


Unnamed: 0,Model,Accuracy,ROC AUC
0,Logistic Regression,0.911757,0.539664
1,Random Forest,0.910218,0.52685
2,Optimized Random Forest,0.911454,0.511418


The AUC (Area Under the Curve) value ranges from 0 to 1:
	•	1.0: Perfect model.
	•	0.5: No discrimination (random guessing).
	•	< 0.5: Worse than random guessing.

### Final Model Performance
The model with the best balance of accuracy and reduced false negatives.

To improve model performance, we'll address class imbalance with Resampling, Class weighting, and applying SMOTE to oversample the minority class (Heart Failure).

We will finally perform threshold tuning.

In [12]:
# Addressing Class Imbalance with Resampling and Class Weighting

from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Apply SMOTE to oversample the minority class
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Train Random Forest with class weighting
weighted_rf = RandomForestClassifier(
    n_estimators=200, 
    max_depth=10, 
    random_state=42, 
    class_weight='balanced'
)
weighted_rf.fit(X_resampled, y_resampled)

# Evaluate the weighted model
weighted_rf_preds = weighted_rf.predict(X_test)

print("Weighted Random Forest Evaluation:")
print(classification_report(y_test, weighted_rf_preds))
print(f"Accuracy: {accuracy_score(y_test, weighted_rf_preds)}")
print(f"ROC AUC: {roc_auc_score(y_test, weighted_rf_preds)}")


Weighted Random Forest Evaluation:
              precision    recall  f1-score   support

           0       0.93      0.95      0.94     81077
           1       0.37      0.29      0.32      7950

    accuracy                           0.89     89027
   macro avg       0.65      0.62      0.63     89027
weighted avg       0.88      0.89      0.89     89027

Accuracy: 0.8927067069540701
ROC AUC: 0.6193425692153968


In [13]:
# Threshold Tuning

# Get prediction probabilities
rf_probs = weighted_rf.predict_proba(X_test)[:, 1]

# Tune threshold
threshold = 0.3  # Set a lower threshold for classifying as class 1
rf_tuned_preds = (rf_probs >= threshold).astype(int)

print("Threshold-Tuned Random Forest Evaluation:")
print(classification_report(y_test, rf_tuned_preds))
print(f"Accuracy: {accuracy_score(y_test, rf_tuned_preds)}")
print(f"ROC AUC: {roc_auc_score(y_test, rf_tuned_preds)}")

# document optimization results in a table
threshold_tuning_results = pd.DataFrame({
    'Model': ['Weighted Random Forest', 'Threshold-Tuned Random Forest'],
    'Accuracy': [accuracy_score(y_test, weighted_rf_preds),
                 accuracy_score(y_test, rf_tuned_preds)],
    'ROC AUC': [roc_auc_score(y_test, weighted_rf_preds),
                roc_auc_score(y_test, rf_tuned_preds)]
})

threshold_tuning_results.to_csv('./data/csv/threshold_tuning_results.csv', index=False)
threshold_tuning_results


Threshold-Tuned Random Forest Evaluation:
              precision    recall  f1-score   support

           0       0.96      0.77      0.86     81077
           1       0.24      0.71      0.35      7950

    accuracy                           0.77     89027
   macro avg       0.60      0.74      0.61     89027
weighted avg       0.90      0.77      0.81     89027

Accuracy: 0.7676435238747795
ROC AUC: 0.7434347502409193


Unnamed: 0,Model,Accuracy,ROC AUC
0,Weighted Random Forest,0.892707,0.619343
1,Threshold-Tuned Random Forest,0.767644,0.743435


We can see now that the recall has significantly improved for heart failure prediction, although at the cost of precision and accuracy.

In [10]:
# Export Features Used in Training
features_used = pd.DataFrame({
    'Feature': X.columns
})
features_used.to_csv('features_used_in_model.csv', index=False)
print("Exported features used in the model to 'features_used_in_model.csv'")


Exported features used in the model to 'features_used_in_model.csv'


In [11]:
# Export the Model as a PKL File
import joblib

# Save the threshold-tuned Random Forest model
joblib.dump(weighted_rf, 'tuned_model.pkl')
print("Exported the model to 'tuned_model.pkl'")


Exported the model to 'tuned_model.pkl'
