In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# Load dataset
df = pd.read_csv('heart.csv')

# Check for missing values
# print(df.isnull().sum())

# Assuming minimal missing values, we'll drop rows with missing data for simplicity
df.dropna(inplace=True)

# Encoding categorical variables and normalizing continuous ones
categorical_features = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
continuous_features = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Prepare the final dataset
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

# Splitting dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Interaction Terms
df['Age_Chol_Interact'] = df['Age'] * df['Cholesterol']
df['Age_RestingBP_Interact'] = df['Age'] * df['RestingBP']

# Polynomial Features for Age and MaxHR (considered outside of the pipeline for simplicity)
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(df[['Age', 'MaxHR']])
poly_feature_names = poly.get_feature_names(['Age', 'MaxHR'])

# Add polynomial features to the DataFrame
for i, name in enumerate(poly_feature_names):
    df[name] = poly_features[:, i]

# Assuming the preprocessor and models are defined as before
# Update continuous_features to include the new features
continuous_features += ['Age_Chol_Interact', 'Age_RestingBP_Interact'] + list(poly_feature_names)

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), continuous_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Define the models and hyperparameters for grid search
models_and_parameters = {
    'LogisticRegression': (LogisticRegression(random_state=42),
                           {'classifier__C': [0.1, 1, 10]}),
    'RandomForestClassifier': (RandomForestClassifier(random_state=42),
                               {'classifier__n_estimators': [100, 200],
                                'classifier__max_depth': [None, 10, 20]}),
    'GradientBoostingClassifier': (GradientBoostingClassifier(random_state=42),
                                   {'classifier__n_estimators': [100, 200],
                                    'classifier__learning_rate': [0.01, 0.1],
                                    'classifier__max_depth': [3, 5]})
}

# Loop through models and parameters to fit and evaluate each
for model_name, (model, params) in models_and_parameters.items():
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', model)])

    grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    print(f"{model_name} Best Parameters: {grid_search.best_params_}")
    print(f"{model_name} Best Score: {grid_search.best_score_}")

    # Evaluate on the test set
    y_pred = grid_search.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)

    print(f"{model_name} Evaluation Metrics:")
    print(f"Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1 Score: {f1}\nROC AUC: {roc_auc}\n")

### Results
The developed models demonstrated promising capabilities in identifying individuals at risk of heart failure, with the Random Forest classifier showing particularly high performance across various metrics. Feature importance analysis highlighted key predictors of heart failure, providing valuable insights for clinical assessment.

In [None]:
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
from reportlab.lib.styles import getSampleStyleSheet

# Assuming this function or similar exists
# def predict_risk(inputs):
#     return np.random.rand()  # Mock prediction

def create_graphs(predictions):
    plt.figure(figsize=(10, 6))
    plt.plot(predictions, label='Heart Failure Risk Score')
    plt.title('Heart Failure Risk Over Time')
    plt.xlabel('Time')
    plt.ylabel('Risk Score')
    plt.legend()
    plt.savefig('line_graph.png')
    plt.close()
    
    risk = np.mean(predictions)
    labels = 'Risk of Heart Failure', 'No Risk of Heart Failure'
    sizes = [risk, 1-risk]
    plt.figure(figsize=(8, 6))
    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140)
    plt.title('Overall Heart Failure Risk')
    plt.savefig('pie_chart.png')

def generate_report(user_info, predictions):
    create_graphs(predictions)
    
    doc = SimpleDocTemplate("heart_failure_report.pdf", pagesize=letter)
    styles = getSampleStyleSheet()
    Story = []
    
    Story.append(Paragraph("Heart Failure Prediction Report", styles['Title']))
    Story.append(Spacer(1, 12))
    
    Story.append(Paragraph("User Information:", styles['Heading2']))
    for key, value in user_info.items():
        Story.append(Paragraph(f"- {key}: {value}", styles['Normal']))
    Story.append(Spacer(1, 12))
    
    prediction_text = ', '.join([f"{score:.2f}" for score in predictions])
    Story.append(Paragraph(f"Predicted Risk Scores: {prediction_text}", styles['Normal']))
    Story.append(Spacer(1, 12))
    
    avg_prediction = np.mean(predictions)
    risk_level = "high" if avg_prediction > 0.5 else "low"
    explanation_text = f"Based on the provided information, the model assesses a {risk_level} risk of heart failure. "
    explanation_text += "These predictions are estimations and should not replace professional medical advice. "
    explanation_text += "Consulting with a healthcare provider is recommended for an accurate assessment and guidance."
    Story.append(Paragraph(explanation_text, styles['Normal']))
    Story.append(Spacer(1, 12))
    
    Story.append(Image('line_graph.png', width=400, height=200))
    Story.append(Spacer(1, 12))
    
    Story.append(Image('pie_chart.png', width=400, height=200))
    Story.append(Spacer(1, 12))
    
    doc.build(Story)

In [None]:
def app():
    st.title('Heart Failure Prediction')
    
    with st.form(key='patient_input'):
        age = st.number_input('Age', min_value=1, max_value=120, value=30)
        sex = st.selectbox('Sex', options=['Male', 'Female'])
        resting_bp = st.number_input('Resting Blood Pressure', min_value=50, max_value=200, value=120)
        cholesterol = st.number_input('Cholesterol', min_value=100, max_value=400, value=200)
        resting_ecg = st.selectbox('Resting ECG', options=['Normal', 'ST', 'LVH'])
        submit_button = st.form_submit_button(label='Predict')
    
    if submit_button:
        inputs = [age, 0 if sex == 'Male' else 1, resting_bp, cholesterol, 0 if resting_ecg == 'Normal' else 1 if resting_ecg == 'ST' else 2]
        # predictions = predict_risk(inputs)  # Uncomment this when predict_risk is implemented
        
        # Mock predictions for demonstration
        predictions = np.random.rand()
        predictions_over_time = np.random.rand(10)
        
        user_info = {
            "Age": age,
            "Sex": sex,
            "Resting Blood Pressure": resting_bp,
            "Cholesterol": cholesterol,
            "Resting ECG": resting_ecg
        }
        
        generate_report(user_info, predictions_over_time)
        
        # Displaying the prediction
        st.write(f'Prediction: {"High Risk" if predictions > 0.5 else "Low Risk"}')
        st.write("Report generated. Check the file `heart_failure_report.pdf`.")

if __name__ == "__main__":
    app()