In [116]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression as lr
from sklearn.tree import DecisionTreeClassifier as dt
from sklearn.ensemble import RandomForestClassifier as rf
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB as gb
from sklearn.neighbors import KNeighborsClassifier as knn
from xgboost import XGBClassifier as xgb
import warnings
warnings.filterwarnings('ignore')

In [None]:
heart=pd.read_csv("/content/heart.csv")
heart.head()

In [None]:
print(heart.shape)
x=heart.drop(columns=['target'])
y=heart['target']
print(x.shape)
print(y.shape)

In [None]:
# check for the missing values if any
a=heart.isnull().sum()
print(a)

## **Data Visualization**

In [None]:
# features
features=x
plt.figure(figsize=(10,5))
sns.barplot(x)
plt.show()

In [None]:
# label
plt.figure(figsize=(3,4))
plt.xticks([0,1])
plt.yticks(range(0,600,100))
sns.histplot(x=heart['target'],color='green')
plt.show()

In [None]:
fig,axs=plt.subplots(4,3,figsize=(15,10))
feat=heart.columns
for i,ax in enumerate(axs.flatten()):
  ax.set_title(heart.columns[i])
  ax.hist(x=heart[feat[i]],color='red')
  ax.set_xlabel(heart.columns[i])
  ax.set_ylabel('frequency')

plt.tight_layout()

plt.show()

In [None]:
bin=[20,30,40,50,60,70,80]
lab=['20-30','30-40','40-50','50-60','60-70','70-80']
heart['age_bin']=pd.cut(heart['age'],include_lowest=True,bins=bin,labels=lab)
plt.figure(figsize=(10,5))
sns.countplot(x=heart['age_bin'],palette='Set1',width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(4,4))
color=['blue','pink']
label=['male','female']
sns.countplot(x=heart['sex'],palette=color,width=0.4)
plt.legend(title='SEX',labels=label)
plt.show()

In [None]:
plt.figure(figsize=(4,4))
color=['blue','pink','green','red']
sns.countplot(x=heart['cp'],palette=color,width=0.4)
plt.show()

In [None]:
bins=[90,100,110,120,130,140,150,160,170,180,190,200]
labs=['90-100','100-110','110-120','120-130','130-140','140-150','150-160','160-170','170-180','180-190','190-200']
heart['trestbps_level']=pd.cut(heart['trestbps'],include_lowest=True,bins=bins,labels=labs)
plt.figure(figsize=(10,6))
sns.countplot(x=heart['trestbps_level'],palette='Set1',width=0.4)
plt.show()

In [None]:
bins = [0, 150, 200, 250, 300, 350, 400, 450, 500]
labels = ['0-150', '151-200', '201-250', '251-300', '301-350', '351-400', '401-450', '451-500']
heart['chol_level'] = pd.cut(heart['chol'], bins=bins, labels=labels, include_lowest=True)
plt.figure(figsize=(10,6))
sns.countplot(x=heart['chol_level'],palette='pastel',width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(6,6))
color=['yellow','orange']
sns.countplot(x=heart['fbs'],palette=color,width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(6,6))
color=['purple','green','maroon']
sns.countplot(x=heart['restecg'],palette=color,width=0.4)
plt.show()


In [None]:
bins=[70,80,90,100,110,120,130,140,150,160,170,180,190,200,210]
labs=['70-80','80-90','90-100','100-110','110-120','120-130','130-140','140-150','150-160','160-170','170-180','180-190','190-200','200-210']
heart['thalach_level']=pd.cut(heart['thalach'],include_lowest=True,bins=bins,labels=labs)
plt.figure(figsize=(14,6))
sns.countplot(x=heart['thalach_level'],palette='viridis',width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(4,4))
color=['red','green']
sns.countplot(x=heart['exang'],palette=color,width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(15,6))
sns.countplot(x=heart['oldpeak'],palette='pastel',width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(4,4))
sns.countplot(x=heart['slope'],palette='Set1',width=0.4)
plt.show()


In [None]:
plt.figure(figsize=(4,4))
sns.countplot(x=heart['ca'],palette='Set1',width=0.4)
plt.show()

In [None]:
plt.figure(figsize=(4,4))
sns.countplot(x=heart['thal'],palette='Set1',width=0.4)
plt.show()

***Splitting the dataset***

In [None]:
x_train, x_test,y_train, y_test=train_test_split(x,y,test_size=0.4,random_state=42)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

***Perform Feature Scaling***

In [5]:
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

LOGISTIC REGRESSION

In [None]:
model1=lr(random_state=42,C=0.01)
model1.fit(x_train_scaled,y_train)
y_pred1=model1.predict(x_test_scaled)
print('Accuracy on test set:',accuracy_score(y_test,y_pred1)*100)
y_pred_train1=model1.predict(x_train_scaled)
print('Accuracy on training set:',accuracy_score(y_train,y_pred_train1)*100)
cm=confusion_matrix(y_test,y_pred1)
print('Confusion Matrix:\n',cm)
clr=classification_report(y_test,y_pred1)
print('Classification Report:\n',clr)

In [None]:
model1=cross_val_score(model1,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",model1*100)
print("Mean CV Accuracy:",model1.mean()*100)

RANDOM FOREST

In [None]:
model2=rf(random_state=42,max_depth=7,n_estimators=300)
model2.fit(x_train_scaled,y_train)
y_pred2=model2.predict(x_test_scaled)
print(f'Accuracy on test set:',accuracy_score(y_test,y_pred2)*100)
y_pred_train2=model2.predict(x_train_scaled)
print(f'Accuracy on training set: ',accuracy_score(y_train,y_pred_train2)*100)
cm2=confusion_matrix(y_test,y_pred2)
print('Confusion Matrix:\n',cm2)
clr2=classification_report(y_test,y_pred2)
print('Classification Report:\n',clr2)


In [None]:
cv_scores=cross_val_score(model2,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",cv_scores*100)
print("Mean CV Accuracy:",cv_scores.mean()*100)

DECISION TREE

In [None]:
model3=dt(random_state=42,max_depth=7)
model3.fit(x_train_scaled,y_train)
y_pred3=model3.predict(x_test_scaled)
print('Accuracy on test set:',accuracy_score(y_test,y_pred3)*100)
y_pred_train3=model3.predict(x_train_scaled)
print('Accuracy on training set:',accuracy_score(y_train,y_pred_train3)*100)
cm3=confusion_matrix(y_test,y_pred3)
print('Confusion Matrix:\n',cm3)
clr3=classification_report(y_test,y_pred3)
print('Classification Report:\n',clr3)

In [None]:
cv_scores=cross_val_score(model3,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",cv_scores*100)
print("Mean CV Accuracy:",cv_scores.mean()*100)

NAIVE BAYES

In [None]:
model4=gb()
model4.fit(x_train_scaled,y_train)
y_pred4=model4.predict(x_test_scaled)
print('Accuracy on test set:',accuracy_score(y_test,y_pred4)*100)
y_pred_train4=model4.predict(x_train_scaled)
print('Accuracy on training set:',accuracy_score(y_train,y_pred_train4)*100)
cm4=confusion_matrix(y_test,y_pred4)
print('Confusion Matrix:\n',cm4)
clr4=classification_report(y_test,y_pred4)
print('Classification Report:\n',clr4)

In [None]:
cv_scores=cross_val_score(model4,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",cv_scores*100)
print("Mean CV Accuracy:",cv_scores.mean()*100)

K-NEAREST NEIGHBORS

In [None]:
model5=knn(n_neighbors=7)
model5.fit(x_train_scaled,y_train)
y_pred5=model5.predict(x_test_scaled)
print('Accuracy on test set:',accuracy_score(y_test,y_pred5)*100)
y_pred_train5=model5.predict(x_train_scaled)
print('Accuracy on training set:',accuracy_score(y_train,y_pred_train5)*100)
cv_scores=cross_val_score(model5,x_train_scaled,y_train,cv=5)
cm5=confusion_matrix(y_test,y_pred5)
print('Confusion Matrix:\n',cm5)
clr5=classification_report(y_test,y_pred5)
print('Classification Report:\n',clr5)

In [None]:
cv_scores=cross_val_score(model5,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",cv_scores*100)
print("Mean CV Accuracy:",cv_scores.mean()*100)

XGBOOST

In [None]:
model6=xgb(C=0.1, max_depth=2,n_estimators=150)
model6.fit(x_train_scaled, y_train)
y_pred6 = model6.predict(x_test_scaled)
print('Accuracy on test set:', accuracy_score(y_test, y_pred6) * 100)
y_pred_train6 = model6.predict(x_train_scaled)
print('Accuracy on training set:', accuracy_score(y_train, y_pred_train6) * 100)
cm6=confusion_matrix(y_test,y_pred6)
print('Confusion Matrix:\n',cm6)
clr6=classification_report(y_test,y_pred6)
print('Classification Report:\n',clr6)

In [None]:
cv_scores=cross_val_score(model6,x_train_scaled,y_train,cv=5)
print("Cross-Validation Scores:",cv_scores*100)
print("Mean CV Accuracy:",cv_scores.mean()*100)