## Importing the Libraries

In [1]:
import pandas as pd

## Importing the dataset

In [2]:
data = pd.read_csv('heart_disease_data.csv')

## Taking Care of Missing Values

In [3]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

## Taking care of Duplicate Values

In [4]:
data_dup = data.duplicated().any()

In [5]:
data_dup

True

In [6]:
data = data.drop_duplicates()

In [7]:
data_dup = data.duplicated().any()

In [8]:
data_dup

False

## Data Processing

In [9]:
cate_val=[]
cont_val=[]

for column in data.columns:
    if data[column].nunique() <=10:
        cate_val.append(column)
    else:
        cont_val.append(column)

In [10]:
cate_val

['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'target']

In [11]:
cont_val

['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

## Encoding Categorical Data

In [12]:
cate_val

['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'target']

In [13]:
data['cp'].unique()

array([3, 2, 1, 0], dtype=int64)

In [14]:
cate_val.remove('sex')
cate_val.remove('target')
data = pd.get_dummies(data,columns=cate_val,drop_first=True)

In [15]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,63,1,145,233,150,2.3,1,False,False,True,...,False,False,False,False,False,False,False,True,False,False
1,37,1,130,250,187,3.5,1,False,True,False,...,False,False,False,False,False,False,False,False,True,False
2,41,0,130,204,172,1.4,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
3,56,1,120,236,178,0.8,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
4,57,0,120,354,163,0.6,1,False,False,False,...,True,False,True,False,False,False,False,False,True,False


## Feature Scaling

In [16]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,63,1,145,233,150,2.3,1,False,False,True,...,False,False,False,False,False,False,False,True,False,False
1,37,1,130,250,187,3.5,1,False,True,False,...,False,False,False,False,False,False,False,False,True,False
2,41,0,130,204,172,1.4,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
3,56,1,120,236,178,0.8,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
4,57,0,120,354,163,0.6,1,False,False,False,...,True,False,True,False,False,False,False,False,True,False


In [17]:
from sklearn.preprocessing import StandardScaler

In [18]:
st = StandardScaler()

In [19]:
st = data[cont_val] = st.fit_transform(data[cont_val])

In [20]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,0.949794,1,0.764066,-0.261285,0.018826,1.084022,1,False,False,True,...,False,False,False,False,False,False,False,True,False,False
1,-1.928548,1,-0.091401,0.067741,1.636979,2.118926,1,False,True,False,...,False,False,False,False,False,False,False,False,True,False
2,-1.485726,0,-0.091401,-0.822564,0.980971,0.307844,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
3,0.174856,1,-0.661712,-0.203222,1.243374,-0.209608,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
4,0.285561,0,-0.661712,2.080602,0.587366,-0.382092,1,False,False,False,...,True,False,True,False,False,False,False,False,True,False


## Splitting the Dataset Into the Training Set and Test Set

In [21]:
X = data.drop('target',axis=1)

In [22]:
y = data['target']

In [23]:
from sklearn.model_selection import train_test_split

In [24]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [25]:
y_test

180    0
229    0
111    1
247    0
60     1
      ..
250    0
104    1
300    0
194    0
185    0
Name: target, Length: 61, dtype: int64

## Logistic Regression

In [26]:
data.head()

Unnamed: 0,age,sex,trestbps,chol,thalach,oldpeak,target,cp_1,cp_2,cp_3,...,exang_1,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,0.949794,1,0.764066,-0.261285,0.018826,1.084022,1,False,False,True,...,False,False,False,False,False,False,False,True,False,False
1,-1.928548,1,-0.091401,0.067741,1.636979,2.118926,1,False,True,False,...,False,False,False,False,False,False,False,False,True,False
2,-1.485726,0,-0.091401,-0.822564,0.980971,0.307844,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
3,0.174856,1,-0.661712,-0.203222,1.243374,-0.209608,1,True,False,False,...,False,False,True,False,False,False,False,False,True,False
4,0.285561,0,-0.661712,2.080602,0.587366,-0.382092,1,False,False,False,...,True,False,True,False,False,False,False,False,True,False


In [27]:
from sklearn.linear_model import LogisticRegression

In [28]:
log = LogisticRegression()
log.fit(X_train,y_train)

In [29]:
y_pred1 = log.predict(X_test)

In [30]:
from sklearn.metrics import accuracy_score

In [31]:
accuracy_score(y_test,y_pred1)

0.9016393442622951

## SVC

In [32]:
from sklearn import svm

In [33]:
svm = svm.SVC()

In [34]:
svm.fit(X_train,y_train)

In [35]:
y_pred2 = svm.predict(X_test)

In [36]:
accuracy_score(y_test,y_pred2)

0.8688524590163934

## KNeighbors Classifier

In [37]:
from sklearn.neighbors import KNeighborsClassifier

In [38]:
knn = KNeighborsClassifier()

In [39]:
knn.fit(X_train,y_train)

In [40]:
y_pred3=knn.predict(X_test)

In [41]:
accuracy_score(y_test,y_pred3)

0.8688524590163934

In [42]:
score = []
for k in range(1,40):
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train,y_train)
    y_pred=knn.predict(X_test)
    score.append(accuracy_score(y_test,y_pred))

In [43]:
score

[0.8360655737704918,
 0.8524590163934426,
 0.8524590163934426,
 0.8688524590163934,
 0.8688524590163934,
 0.8360655737704918,
 0.8524590163934426,
 0.8360655737704918,
 0.8524590163934426,
 0.8360655737704918,
 0.8524590163934426,
 0.8360655737704918,
 0.8360655737704918,
 0.8524590163934426,
 0.8524590163934426,
 0.8524590163934426,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8360655737704918,
 0.8524590163934426,
 0.8524590163934426,
 0.8524590163934426,
 0.8688524590163934,
 0.8688524590163934,
 0.8524590163934426,
 0.8688524590163934,
 0.8524590163934426,
 0.8688524590163934,
 0.8360655737704918,
 0.8524590163934426,
 0.8360655737704918,
 0.8524590163934426,
 0.8524590163934426]

In [44]:
knn=KNeighborsClassifier(n_neighbors=2)
knn.fit(X_train,y_train)
y_pred=knn.predict(X_test)
accuracy_score(y_test,y_pred)

0.8524590163934426

## Non-Linear ML Algorithms

In [45]:
data = pd.read_csv('C:/Users/mrsub/Downloads/heart.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/mrsub/Downloads/heart.csv'

In [None]:
data.head()

In [None]:
data = data.drop_duplicates()

In [None]:
data.shape

In [None]:
X = data.drop('target',axis=1)
y=data['target']

In [None]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.2,random_state=42)

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt = DecisionTreeClassifier()

In [None]:
dt.fit(X_train,y_train)

In [None]:
y_pred4= dt.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred4)

## Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier()

In [None]:
rf.fit(X_train,y_train)

In [None]:
y_pred5= rf.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred5)

## Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gbc = GradientBoostingClassifier()

In [None]:
gbc.fit(X_train,y_train)

In [None]:
y_pred6 = gbc.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred6)

In [None]:
final_data = pd.DataFrame({'Models':['LR','SVM','KNN','DT','RF','GB'],'ACC':[accuracy_score(y_test,y_pred1),
                                                                            accuracy_score(y_test,y_pred2),
                                                                            accuracy_score(y_test,y_pred3),
                                                                            accuracy_score(y_test,y_pred4),
                                                                            accuracy_score(y_test,y_pred5),
                                                                            accuracy_score(y_test,y_pred6)]})

In [None]:
final_data

In [None]:
import seaborn as sns

In [None]:
sns.barplot(final_data['Models'],final_data['ACC'])

In [None]:
X = data.drop('target',axis=1)
y=data['target']

In [None]:
X.shape

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier()
rf.fit(X,y)

## Prediction on New data

In [None]:
import pandas as pd

In [None]:
new_data = pd.DataFrame({
    'age':52,
    'sex':1,
    'cp':0,
    'trestbps':125,
    'chol':212,
    'fbs':0,
    'restecg':1,
    'thalach':168,
    'exang':0,
    'oldpeak':1.0,
    'slope':2,
    'ca':2,
    'thal':3,
},index=[0])

In [None]:
new_data

In [None]:
p = rf.predict(new_data)
if p[0]==0:
    print("No Disease")
else:
    print("Disease")

## Save Model Design Using Joblib

In [None]:
import joblib

In [None]:
joblib.dump(rf,'model_joblib_heart')

In [None]:
model = joblib.load('model_joblib_heart')

In [None]:
model.predict(new_data)

## GUI

In [None]:
from tkinter import *
import joblib

def validate_entry(entry):
    if entry.get().strip() == "":
        entry.config(bg="red")
    else:
        entry.config(bg="white")

def show_entry_fields():
    p1 = int(entries[0].get())
    p2 = int(entries[1].get())
    p3 = int(entries[2].get())
    p4 = int(entries[3].get())
    p5 = int(entries[4].get())
    p6 = int(entries[5].get())
    p7 = int(entries[6].get())
    p8 = int(entries[7].get())
    p9 = int(entries[8].get())
    p10 = float(entries[9].get())
    p11 = int(entries[10].get())
    p12 = int(entries[11].get())
    p13 = int(entries[12].get())
    
    model = joblib.load('model_joblib_heart')
    result = model.predict([[p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13]])
    
    if result == 0:
        result_label.config(text="No Heart Disease", fg="red", font=("Arial", 12, "bold"))
    else:
        result_label.config(text="Possibility of Heart Disease", fg="red", font=("Arial", 12, "bold"))

master = Tk()
master.title("Heart Disease Prediction System")

def on_configure(event):
    canvas.configure(scrollregion=canvas.bbox('all'))

def make_round(widget):
    widget.config(relief=RAISED, bd=2, highlightthickness=2)

def create_entry(master, text):
    entry = Entry(master, bg="white", bd=2, relief="flat", font=("Arial", 12))
    entry.insert(0, text)
    entry.bind("<FocusIn>", lambda event: entry.delete(0, END))
    entry.bind("<KeyRelease>", lambda event: validate_entry(entry))
    return entry

canvas = Canvas(master)
canvas.pack(side=LEFT, fill=BOTH, expand=TRUE)
scrollbar = Scrollbar(master, command=canvas.yview)
scrollbar.pack(side=RIGHT, fill=Y)
canvas.configure(yscrollcommand=scrollbar.set)
frame = Frame(canvas)
canvas.create_window((0,0), window=frame, anchor='nw')
frame.bind("<Configure>", on_configure)

label = Label(frame, text="Heart Disease Prediction System", font=("Arial", 16, "bold"), fg="red")
label.pack(pady=10)

labels = ["Enter Your Age", "Male Or Female [1/0]", "Enter Value of CP", "Enter Value of trestbps",
          "Enter Value of chol", "Enter Value of fbs", "Enter Value of restecg", "Enter Value of thalach",
          "Enter Value of exang", "Enter Value of oldpeak", "Enter Value of slope", "Enter Value of ca",
          "Enter Value of thal"]
entries = []

for label_text in labels:
    entry = create_entry(frame, label_text)
    entry.pack(fill=X, padx=10, pady=5)
    entries.append(entry)

result_label = Label(frame, font=("Arial", 12))
result_label.pack(pady=10)

predict_button = Button(frame, text='Predict', command=show_entry_fields, bg="blue", fg="white", font=("Arial", 12))
predict_button.pack(ipady=5, padx=20, pady=10, fill=X)
make_round(predict_button)

mainloop()