In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("/content/heart.csv")
df[:5]

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trtbps    303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalachh  303 non-null    int64  
 8   exng      303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slp       303 non-null    int64  
 11  caa       303 non-null    int64  
 12  thall     303 non-null    int64  
 13  output    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
df.isnull().sum()

age         0
sex         0
cp          0
trtbps      0
chol        0
fbs         0
restecg     0
thalachh    0
exng        0
oldpeak     0
slp         0
caa         0
thall       0
output      0
dtype: int64

In [6]:
df.drop_duplicates(keep='first',inplace=True)

In [7]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cols_to_normalize = ['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall']
df[cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])
df[:3]

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,0.708333,1.0,1.0,0.481132,0.244292,1.0,0.0,0.603053,0.0,0.370968,0.0,0.0,0.333333,1
1,0.166667,1.0,0.666667,0.339623,0.283105,0.0,0.5,0.885496,0.0,0.564516,0.0,0.0,0.666667,1
2,0.25,0.0,0.333333,0.339623,0.178082,0.0,0.0,0.770992,0.0,0.225806,1.0,0.0,0.666667,1


In [8]:
y = df["output"]
X = df.drop("output",axis=1)

X.shape, y.shape

((302, 13), (302,))

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.15, random_state = 42)

In [10]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128,activation = "relu"),
    tf.keras.layers.Dense(64,activation = "relu"),
    tf.keras.layers.Dense(32,activation = "relu"),
    tf.keras.layers.Dense(16,activation = "relu"),
    tf.keras.layers.Dense(1,activation = "sigmoid")
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.Adam(),
              metrics = ["accuracy"])

history = model.fit(X_train,y_train,
          epochs=300,
          validation_data=(X_test,y_test),
          verbose=0)

In [11]:
model.evaluate(X_test,y_test)



[1.5710172653198242, 0.8478260636329651]

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1792      
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 16)                528       
                                                                 
 dense_4 (Dense)             (None, 1)                 17        
                                                                 
Total params: 12,673
Trainable params: 12,673
Non-trainable params: 0
_________________________________________________________________


In [13]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)

In [14]:
y_preds = clf.predict(X_test)
y_preds[:5]

array([0, 0, 0, 1, 1])

In [15]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_preds)
cm

array([[15,  3],
       [ 6, 22]])

In [16]:
precision = cm[1,1] / (cm[0,1] + cm[1,1])  # true positives / (false positives + true positives)
recall = cm[1,1] / (cm[1,0] + cm[1,1])  # true positives / (false negatives + true positives)
accuracy = (cm[1,1] + cm[0,0]) / len(y_preds)

print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:",accuracy)

Precision: 0.88
Recall: 0.7857142857142857
Accuracy: 0.8043478260869565


In [17]:
from sklearn import svm
model = svm.SVC()
model.fit(X_train, y_train)
  
predicted = model.predict(X_test)
cm = confusion_matrix(y_test, predicted)

precision = cm[1,1] / (cm[0,1] + cm[1,1])  # true positives / (false positives + true positives)
recall = cm[1,1] / (cm[1,0] + cm[1,1])  # true positives / (false negatives + true positives)
accuracy = (cm[1,1] + cm[0,0]) / len(y_preds)

print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:",accuracy)

Precision: 0.9130434782608695
Recall: 0.75
Accuracy: 0.8043478260869565


In [24]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators = 156, random_state = 42)  
model.fit(X_train, y_train)  
predicted = model.predict(X_test)

print("The accuracy of Random Forest is : ", accuracy_score(y_test, predicted.round())*100, "%")

The accuracy of Random Forest is :  82.6086956521739 %


In [35]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors = 128)  
model.fit(X_train, y_train)
predicted = model.predict(X_test)
  
print(confusion_matrix(y_test, predicted))
print("The accuracy of KNN is : ", accuracy_score(y_test, predicted.round())*100, "%")

[[15  3]
 [ 4 24]]
The accuracy of KNN is :  84.78260869565217 %
