3. Predicting Heart Disease Using Logistic Regression
   Dataset: Heart Disease Dataset
   Preprocessing Steps:
     - Handle missing values (e.g., fill missing values with mean).
     - Encode categorical variables (e.g., one-hot encoding for gender, chest pain type, etc.).
     - Standardize numerical features.
   Task: Implement logistic regression to predict heart disease and evaluate the model using accuracy and ROC-AUC.


In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [5]:
df = pd.read_csv('/content/heart_disease_data.csv')

In [6]:
df.fillna(df.mean(), inplace = True)

In [7]:
x = df.drop('target', axis = 1)
y = df['target']

In [8]:
categorical_cols = [col for col in x.columns if x[col].dtype == 'object']
numerical_cols = [col for col in x.columns if x[col].dtype != 'object']

In [10]:
preprocessor = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), numerical_cols),
        ('categs', OneHotEncoder(), categorical_cols)]
)

In [12]:
model = Pipeline(steps = [
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state = 42))
])

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y ,test_size = 0.2, random_state = 42, stratify =y )

In [15]:
model.fit(x_train, y_train)

In [17]:
y_pred = model.predict(x_test)
y_predProbs = model.predict_proba(x_test)[: , 1]

In [19]:
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_predProbs)
confusionMatrix = confusion_matrix(y_test, y_pred)

In [21]:
print(f"Accuracy: {accuracy}")
print(f"ROC-AUC: {roc_auc}")
print(f"Confusion Matrix: \n{confusionMatrix}")

Accuracy: 0.8032786885245902
ROC-AUC: 0.8690476190476191
Confusion Matrix: 
[[19  9]
 [ 3 30]]
