Trying with Logistic Regression

In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

df = pd.read_csv("framingham.csv").dropna()
X = df.drop(columns=["TenYearCHD"])
y = df["TenYearCHD"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Accuracy: 0.8309

Classification Report:

              precision    recall  f1-score   support

           0       0.84      0.99      0.91       622
           1       0.57      0.06      0.11       129

    accuracy                           0.83       751
   macro avg       0.70      0.53      0.51       751
weighted avg       0.79      0.83      0.77       751



Making a function that gives the prediction along with the possible causes

In [7]:
def predict_heart_health_and_causes(input_data):
    input_df = pd.DataFrame([input_data])
    input_scaled = scaler.transform(input_df)

    prob = model.predict_proba(input_scaled)[0][1]
    threshold = 0.35
    pred = 1 if prob > threshold else 0

    causes = []
    if input_data.get('cigsPerDay', 0) > 10:
        causes.append('Smoking')
    if input_data.get('totChol', 0) > 240:
        causes.append('High Cholesterol')
    if input_data.get('sysBP', 0) > 140 or input_data.get('diaBP', 0) > 90:
        causes.append('High Blood Pressure')
    if input_data.get('glucose', 0) > 125:
        causes.append('High Glucose / Possible Diabetes')
    if input_data.get('diabetes', 0) == 1:
        causes.append('Diabetes')
    if input_data.get('BPMeds', 0) == 1:
        causes.append('On BP Medication (risk indicator)')

    if len(causes) >= 2:
        pred = 1

    if pred == 0:
        return f"Prediction: Healthy heart. (Risk: {prob:.2f})"
    else:
        return f"Prediction: Unhealthy heart. (Risk: {prob:.2f})\nLikely Causes: {', '.join(causes)}"


Sample input for which the answers are known

In [9]:
example_input = {
    'male': 1,
    'age': 55,
    'currentSmoker': 1,
    'cigsPerDay': 20,
    'BPMeds': 0,
    'prevalentStroke': 0,
    'prevalentHyp': 1,
    'diabetes': 0,
    'totChol': 250,
    'sysBP': 160,
    'diaBP': 100,
    'BMI': 30.0,
    'heartRate': 80,
    'glucose': 140
}

print(predict_heart_health_and_causes(example_input))


Prediction: Unhealthy heart. (Risk: 0.51)
Likely Causes: Smoking, High Cholesterol, High Blood Pressure, High Glucose / Possible Diabetes


The following output is definitely false as it has all the ranges in favour of healthy heart but it gives a false output leadin us to change the model as it is not so accurate

In [11]:
example_input = {
    'male': 1,
    'age': 70,
    'currentSmoker': 1,
    'cigsPerDay':0,
    'BPMeds': 0,
    'prevalentStroke': 0,
    'prevalentHyp': 1,
    'diabetes': 0,
    'totChol': 210,
    'sysBP': 120,
    'diaBP': 80,
    'BMI': 30.0,
    'heartRate': 80,
    'glucose': 101
}

print(predict_heart_health_and_causes(example_input))


Prediction: Unhealthy heart. (Risk: 0.38)
Likely Causes: 


Using RandomForest instead

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

d = pd.read_csv("framingham.csv").dropna()

X = d.drop(columns=["TenYearCHD"])
y = d["TenYearCHD"]

X = X.fillna(X.mean())

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8415446071904128

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.99      0.91       636
           1       0.17      0.01      0.02       115

    accuracy                           0.84       751
   macro avg       0.51      0.50      0.47       751
weighted avg       0.74      0.84      0.78       751



Gives more accuracy and precision and correct output

In [32]:
new_data = pd.DataFrame([{
    'male': 1,
    'age': 55,
    'currentSmoker': 1,
    'cigsPerDay': 20,
    'BPMeds': 0,
    'prevalentStroke': 0,
    'prevalentHyp': 1,
    'diabetes': 0,
    'totChol': 250,
    'sysBP': 160,
    'diaBP': 100,
    'BMI': 30.0,
    'heartRate': 80,
    'glucose': 140
}])

new_data = new_data.fillna(X.mean())
new_data_scaled = scaler.transform(new_data)

# Predict
prediction = rf.predict(new_data_scaled)[0]
label = "Risk" if prediction == 1 else "Healthy"

causes = []
row = new_data.iloc[0]

if row['totChol'] > 240: causes.append("high cholesterol")
if row['sysBP'] > 140: causes.append("high systolic BP")
if row['diaBP'] > 90: causes.append("high diastolic BP")
if row['BMI'] > 27: causes.append("high BMI")
if row['glucose'] > 100: causes.append("high glucose")
if row['currentSmoker'] == 1: causes.append("smoking")
if row['prevalentHyp'] == 1: causes.append("hypertension")
if row['diabetes'] == 1: causes.append("diabetes")

print(f"Prediction: {label}")
if prediction == 1:
    print("Likely causes:", ", ".join(causes))
    for cause in causes:
        display_info(cause)
else: 
    print("No major risk factors detected.")
    print("✅ You're doing great! Here are tips to keep your heart healthy:\n")
    for key, value in heart_health_tips.items():
        print(f"-> {key.capitalize()}: {value}\n")


Prediction: Risk
Likely causes: high cholesterol, high systolic BP, high diastolic BP, high BMI, high glucose, smoking, hypertension
No information available for high cholesterol
No information available for high systolic bp
No information available for high diastolic bp
No information available for high bmi

--- High Glucose ---

Precautions:
• Avoid sugar-heavy foods
• Eat low GI foods
• Frequent monitoring

Remedies:
• Cinnamon, fenugreek in diet
• Reduce stress
• Stay hydrated

Medications:
• Metformin
• Insulin
• SGLT2 inhibitors

--- Smoking ---

Precautions:
• Avoid tobacco products
• Stay away from passive smoke
• Educate on smoking risks

Remedies:
• Nicotine patches/gum
• Counseling
• Join support groups

Medications:
• Varenicline
• Bupropion
• Nicotine replacement therapy

--- Hypertension ---

Precautions:
• Adhere to schedule
• Don't self-adjust dose
• Monitor side effects

Remedies:
• Lifestyle improvements to reduce stress

Medications:
• yoga and meditations
• Daily wa

In [34]:
new_data = pd.DataFrame([{
     'male': 1,
    'age': 70,
    'currentSmoker': 1,
    'cigsPerDay':0,
    'BPMeds': 0,
    'prevalentStroke': 0,
    'prevalentHyp': 1,
    'diabetes': 0,
    'totChol': 210,
    'sysBP': 120,
    'diaBP': 80,
    'BMI': 30.0,
    'heartRate': 80,
    'glucose': 101
}])

new_data = new_data.fillna(X.mean())
new_data_scaled = scaler.transform(new_data)

# Predict
prediction = rf.predict(new_data_scaled)[0]
label = "Risk" if prediction == 1 else "Healthy"

causes = []
row = new_data.iloc[0]

if row['totChol'] > 240: causes.append("high cholesterol")
if row['sysBP'] > 140: causes.append("high systolic BP")
if row['diaBP'] > 90: causes.append("high diastolic BP")
if row['BMI'] > 27: causes.append("high BMI")
if row['glucose'] > 100: causes.append("high glucose")
if row['currentSmoker'] == 1: causes.append("smoking")
if row['prevalentHyp'] == 1: causes.append("hypertension")
if row['diabetes'] == 1: causes.append("diabetes")

print(f"Prediction: {label}")
if prediction == 1:
    print("Likely causes:", ", ".join(causes))
    for cause in causes:
        display_info(cause)
else: 
    print("No major risk factors detected.")
    print("✅ You're doing great! Here are tips to keep your heart healthy:\n")
    for key, value in heart_health_tips.items():
        print(f"-> {key.capitalize()}: {value}\n")


Prediction: Healthy
No major risk factors detected.
✅ You're doing great! Here are tips to keep your heart healthy:

-> Diet: Follow a balanced diet rich in fruits, vegetables, whole grains, lean proteins, and healthy fats (like olive oil and nuts). Avoid excess salt, sugar, and processed foods.

-> Exercise: Engage in regular physical activity. Aim for at least 150 minutes of moderate aerobic exercise per week, like brisk walking, swimming, or cycling.

-> Hydration: Drink enough water daily to keep your body and cardiovascular system functioning optimally.

-> Stress: Practice stress management through mindfulness, yoga, meditation, or hobbies. Chronic stress can increase heart risk.

-> Smoking: Avoid tobacco products and second-hand smoke. Even occasional smoking harms blood vessels and increases risk.

-> Sleep: Get 7–9 hours of quality sleep every night. Poor sleep increases the risk of high blood pressure and heart disease.

-> Checkups: Get regular health checkups to monitor bl

In [24]:
import ipywidgets as widgets
from IPython.display import display, clear_output

cause_info = {
    "high blood pressure(sys)": {
        "precautions": ["Limit sodium intake", "Exercise regularly", "Avoid stress", "Monitor blood pressure regularly"],
        "remedies": ["DASH diet", "Deep breathing exercises", "Maintain healthy weight"],
        "medications": ["Amlodipine", "Lisinopril", "Losartan"]
    },
    "high blood pressure(dia)": {
        "precautions": ["Limit sodium intake", "Exercise regularly", "Avoid stress", "Monitor blood pressure regularly"],
        "remedies": ["DASH diet", "Deep breathing exercises", "Maintain healthy weight"],
        "medications": ["Amlodipine", "Lisinopril", "Losartan"]
    },
    "smoking": {
        "precautions": ["Avoid tobacco products", "Stay away from passive smoke", "Educate on smoking risks"],
        "remedies": ["Nicotine patches/gum", "Counseling", "Join support groups"],
        "medications": ["Varenicline", "Bupropion", "Nicotine replacement therapy"]
    },
    "diabetes": {
        "precautions": ["Monitor blood sugar", "Maintain healthy diet", "Exercise consistently"],
        "remedies": ["Reduce refined carbs", "Consume more fiber", "Hydrate well"],
        "medications": ["Metformin", "Insulin", "Glipizide"]
    },
    "cholesterol": {
        "precautions": ["Avoid trans fats", "Exercise regularly", "Limit red meat"],
        "remedies": ["Eat oats and legumes", "Use olive oil", "Increase fiber intake"],
        "medications": ["Atorvastatin", "Rosuvastatin", "Ezetimibe"]
    },
    "obesity": {
        "precautions": ["Avoid sugary foods", "Stay active", "Track calorie intake"],
        "remedies": ["Intermittent fasting", "Meal planning", "Join fitness programs"],
        "medications": ["Orlistat", "Liraglutide", "Semaglutide"]
    },
    "stroke history": {
        "precautions": ["Control BP and cholesterol", "Avoid smoking and alcohol", "Follow-up regularly"],
        "remedies": ["Physiotherapy", "Speech therapy", "Healthy lifestyle"],
        "medications": ["Aspirin", "Clopidogrel", "Statins"]
    },
    "high glucose": {
        "precautions": ["Avoid sugar-heavy foods", "Eat low GI foods", "Frequent monitoring"],
        "remedies": ["Cinnamon, fenugreek in diet", "Reduce stress", "Stay hydrated"],
        "medications": ["Metformin", "Insulin", "SGLT2 inhibitors"]
    },
    "high heart rate": {
        "precautions": ["Avoid caffeine", "Manage anxiety", "Stay hydrated"],
        "remedies": ["Meditation", "Slow deep breathing", "Cold splash to face"],
        "medications": ["Beta blockers", "Calcium channel blockers"]
    },
    "age-related risk": {
        "precautions": ["Annual check-ups", "Exercise regularly", "Balanced diet"],
        "remedies": ["Social engagement", "Mental stimulation", "Mobility exercises"],
        "medications": ["As prescribed by physician"]
    },
    "bp medications": {
        "precautions": ["Adhere to schedule", "Don't self-adjust dose", "Monitor side effects"],
        "remedies": ["Lifestyle improvements to reduce dependence", "Avoid alcohol"],
        "medications": ["Amlodipine", "Losartan", "Hydrochlorothiazide"]
    },
    "hypertension": {
        "precautions": ["Adhere to schedule", "Don't self-adjust dose", "Monitor side effects"],
        "remedies": ["Lifestyle improvements to reduce stress"],
        "medications": ["yoga and meditations","Daily walk"]
    },
    "healthy heart": {
        "precautions": ["Daily walk or exercise", "Low-fat diet", "No tobacco or alcohol", "Regular health screenings"],
        "remedies": ["Meditation", "Adequate sleep", "Stay socially active"],
        "medications": ["None – maintain lifestyle and preventive care"]
    }
}

def display_info(cause):
    cause = cause.lower()
    info = cause_info.get(cause)
    if info:
        print(f"\n--- {cause.title()} ---")
        print("\nPrecautions:")
        for item in info["precautions"]:
            print(f"• {item}")
        print("\nRemedies:")
        for item in info["remedies"]:
            print(f"• {item}")
        print("\nMedications:")
        for item in info["medications"]:
            print(f"• {item}")
    else:
        print(f"No information available for {cause}")


In [30]:
heart_health_tips = {
    "diet": "Follow a balanced diet rich in fruits, vegetables, whole grains, lean proteins, and healthy fats (like olive oil and nuts). Avoid excess salt, sugar, and processed foods.",
    "exercise": "Engage in regular physical activity. Aim for at least 150 minutes of moderate aerobic exercise per week, like brisk walking, swimming, or cycling.",
    "hydration": "Drink enough water daily to keep your body and cardiovascular system functioning optimally.",
    "stress": "Practice stress management through mindfulness, yoga, meditation, or hobbies. Chronic stress can increase heart risk.",
    "smoking": "Avoid tobacco products and second-hand smoke. Even occasional smoking harms blood vessels and increases risk.",
    "sleep": "Get 7–9 hours of quality sleep every night. Poor sleep increases the risk of high blood pressure and heart disease.",
    "checkups": "Get regular health checkups to monitor blood pressure, cholesterol, and blood sugar levels, even if you feel fine.",
    "alcohol": "Limit alcohol consumption. Excessive drinking can raise blood pressure and add empty calories.",
    "weight": "Maintain a healthy weight to reduce stress on your heart. Obesity is linked with hypertension, diabetes, and heart disease.",
    "mindset": "Stay positive. A good emotional state is linked with lower heart disease risk. Laugh more, worry less."
}
