In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [3]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# SVC: Support Vector Machine
# SVC: Suport Vector Classifier
# SVR: Support Vector Regressor

In [4]:
data = pd.read_csv('../dataset/heart.csv')

In [5]:
data.sample(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
43,53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
14,58,0,3,150,283,1,0,162,0,1.0,2,0,2,1
251,43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
167,62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
209,59,1,0,140,177,0,1,162,1,0.0,2,1,3,0


In [6]:
categoric_col = ['cp', 'sex', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']

data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [7]:
np.unique(data['fbs'])


array([0, 1], dtype=int64)

In [8]:
np.unique(data['thal'])

array([0, 1, 2, 3], dtype=int64)

In [9]:
data.drop(categoric_col, axis=1).corr()

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,target
age,1.0,0.279351,0.213678,-0.398522,0.210013,-0.225439
trestbps,0.279351,1.0,0.123174,-0.046698,0.193216,-0.144931
chol,0.213678,0.123174,1.0,-0.00994,0.053952,-0.085239
thalach,-0.398522,-0.046698,-0.00994,1.0,-0.344187,0.421741
oldpeak,0.210013,0.193216,0.053952,-0.344187,1.0,-0.430696
target,-0.225439,-0.144931,-0.085239,0.421741,-0.430696,1.0


In [10]:
data.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [11]:
data.dropna(inplace=True)

In [12]:
data.shape

(303, 14)

In [13]:
data.drop_duplicates(inplace=True)

In [14]:
data.shape

(302, 14)

In [15]:
data.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [16]:
y = data['target']
X = data.drop(['target'], axis=1)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=True, random_state=666)

In [18]:
dtree = DecisionTreeClassifier()
rand_forest = RandomForestClassifier(n_jobs=-1)

In [19]:
dtree.fit(X_train, y_train)

In [20]:
y_pred = dtree.predict(X_test)

In [21]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.73      0.73        22
           1       0.75      0.75      0.75        24

    accuracy                           0.74        46
   macro avg       0.74      0.74      0.74        46
weighted avg       0.74      0.74      0.74        46



In [22]:
rand_forest.fit(X_train, y_train)

In [23]:
y_pred = rand_forest.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.82      0.84        22
           1       0.84      0.88      0.86        24

    accuracy                           0.85        46
   macro avg       0.85      0.85      0.85        46
weighted avg       0.85      0.85      0.85        46



In [24]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train,)
X_test = scaler.transform(X_test)

In [25]:
support_vector = SVC()
support_vector.fit(X_train, y_train)

y_pred = support_vector.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.77      0.81        22
           1       0.81      0.88      0.84        24

    accuracy                           0.83        46
   macro avg       0.83      0.82      0.82        46
weighted avg       0.83      0.83      0.83        46

