In [None]:
# Heart Disease classification using machine learning algorithms like SupportVectorMachines(SVM), K-NearestNeighbors(KNN), Decisiontree, linearregression, randomforest, etc.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Load data
data = pd.read_csv("heart.csv")  # Replace "heart.csv" with your downloaded file path


# Check column names (optional)
print(data.columns)

categorical_features = [
    col for col in ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal']  # Adjust based on actual names
    if col in data.columns  # Only include columns present in the data
]

for feature in categorical_features:
    data = pd.get_dummies(data, columns=[feature])
    
    
    



Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')


In [3]:
# Separate features and target variable
X = data.drop("target", axis=1)
y = data["target"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Define and evaluate different models
models = []
models.append(("SVM", SVC(kernel="linear")))
models.append(("KNN", KNeighborsClassifier(n_neighbors=5)))
models.append(("Decision Tree", DecisionTreeClassifier(max_depth=3)))
models.append(("Logistic Regression", LogisticRegression()))
models.append(("Random Forest", RandomForestClassifier(n_estimators=100)))

for name, model in models:
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on testing data
    y_pred = model.predict(X_test)

    # Evaluate model performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"** Model: {name} **")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print("-" * 30)
    
    


# Choose the best model based on your evaluation criteria



** Model: SVM **
Accuracy: 0.9854
Precision: 1.0000
Recall: 0.9709
F1-Score: 0.9852
------------------------------
** Model: KNN **
Accuracy: 0.7707
Precision: 0.7593
Recall: 0.7961
F1-Score: 0.7773
------------------------------
** Model: Decision Tree **
Accuracy: 0.8049
Precision: 0.7890
Recall: 0.8350
F1-Score: 0.8113
------------------------------
** Model: Logistic Regression **
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
------------------------------
** Model: Random Forest **
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
------------------------------


In [5]:
# Sample prediction (assuming X_test has new data)
# Choose the model you want to use for prediction (e.g., best performing model)
chosen_model = models[4][1]  # Assuming the  model (random forest) is chosen 


#Here we are using random forest for prediction based on the accuracy score

# Select a sample from testing data
new_data = X_test[56, :]  # Use NumPy indexing instead of iloc

# Predict using the chosen model
new_data_pred = chosen_model.predict([new_data])



In [6]:
print("\nSample Prediction:")
if new_data_pred[0] == 1:
    print("Predicted: Person has heart disease")
else:
    print("Predicted: Person is less likely to have heart disease")

print("Note: This is just a prediction based on the model. Always consult a medical professional for diagnosis and treatment.")


Sample Prediction:
Predicted: Person is less likely to have heart disease
Note: This is just a prediction based on the model. Always consult a medical professional for diagnosis and treatment.
