In [None]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from tensorflow import keras
from tensorflow.keras import layers


In [None]:
warnings.filterwarnings('ignore')

In [None]:
# Load dataset
df = pd.read_csv("heart.csv")

In [None]:
# Check for missing values
print(df.isnull().sum())

In [None]:
# Convert categorical features into numerical ones (if necessary)
df['sex'] = df['sex'].map({'Female': 0, 'Male': 1})
df['target'] = df['target'].map({'No': 0, 'Yes': 1})

In [None]:
# Features and target
X = df.drop('target', axis=1)
y = df['target']

In [None]:
# Train-test split (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Functions for training and evaluation of each model
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    return accuracy, report

In [None]:
# List to store results
models = ['Logistic Regression', 'Random Forest', 'SVC', 'KNN', 'Decision Tree', 'Neural Network']
accuracies = []

In [None]:
# a. Logistic Regression
log_reg = LogisticRegression()
acc, report = train_and_evaluate(log_reg, X_train_scaled, y_train, X_test_scaled, y_test)
accuracies.append(acc)

# b. Random Forest Classifier
rf = RandomForestClassifier()
acc, report = train_and_evaluate(rf, X_train_scaled, y_train, X_test_scaled, y_test)
accuracies.append(acc)

# c. Support Vector Classifier (SVC)
svc = SVC()
acc, report = train_and_evaluate(svc, X_train_scaled, y_train, X_test_scaled, y_test)
accuracies.append(acc)

# d. K-Nearest Neighbors (KNN)
knn = KNeighborsClassifier()
acc, report = train_and_evaluate(knn, X_train_scaled, y_train, X_test_scaled, y_test)
accuracies.append(acc)

# e. Decision Tree Classifier
dt = DecisionTreeClassifier()
acc, report = train_and_evaluate(dt, X_train_scaled, y_train, X_test_scaled, y_test)
accuracies.append(acc)

In [None]:
# f. Neural Network
nn_model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_scaled, y_train, epochs=100, batch_size=10, verbose=0)

In [None]:
# Evaluate Neural Network
nn_loss, nn_accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=0)
accuracies.append(nn_accuracy)

# Create DataFrame for model comparison
comparison_df = pd.DataFrame({'Model': models, 'Accuracy': accuracies})

# Print results
print(comparison_df)

In [None]:
# Plot the comparison
plt.figure(figsize=(8, 5))
sns.barplot(data=comparison_df, x='Model', y='Accuracy', palette='viridis')
plt.title('Model Comparison')
plt.ylim(0, 1)  # Set limit for y-axis
plt.show()

In [None]:
# Plot confusion matrix for the best performing model
best_model_idx = np.argmax(accuracies[:-1])  # Exclude the NN from this logic since we're plotting it separately
best_model_name = models[best_model_idx]
if best_model_name == 'Random Forest':
    best_model = rf
else:
    best_model = [log_reg, svc, knn, dt][best_model_idx-1]  # Adjust for offset due to NN in the list

In [None]:
# Get confusion matrix
plt.figure(figsize=(6,6))
y_pred_best_model = best_model.predict(X_test_scaled)
cm = confusion_matrix(y_test, y_pred_best_model)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=['No Disease', 'Disease'], yticklabels=['No Disease', 'Disease'])
plt.title(f"Confusion Matrix - {best_model_name}")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.show()  