## Preprocessing

In [2]:
import pandas as pd
import numpy as np

In [4]:
dataset = pd.read_csv('/content/heart.csv')
dataset.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [6]:
data = dataset.iloc[:, 0:13].values
target = dataset.iloc[:, 13].values
target

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [18]:
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [1,5,6,11,12])],remainder='passthrough')
data = onehotencoder.fit_transform(data)

In [27]:
scaler = StandardScaler()
data = scaler.fit_transform(data)

## Algorithms

*  Decision Tree
*  Random Forest
*  SVM
*  Neural Network

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

### Decision Tree

In [30]:
parameters = {'criterion': ['entropy', 'gini'],
              'splitter': ['best', 'random'],
              'min_samples_leaf': [1, 3, 5, 7], 
              'min_samples_split': [2, 3, 4, 5]}
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parameters)
grid_search.fit(data, target)
best_parameters = grid_search.best_params_
best_result = grid_search.best_score_

In [31]:
best_parameters, best_result

({'criterion': 'entropy',
  'min_samples_leaf': 7,
  'min_samples_split': 2,
  'splitter': 'random'},
 0.8313114754098361)

### Random Forest

In [32]:
parameters = {'criterion': ['entropy', 'gini'],
              'n_estimators': [10, 25, 50],
              'min_samples_leaf': [1, 3, 5], 
              'min_samples_split': [2, 3, 4]}
grid_search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parameters)
grid_search.fit(data, target)
best_parameters = grid_search.best_params_
best_result = grid_search.best_score_

In [33]:
best_parameters, best_result # 85 without dummy variables and scaled data

({'criterion': 'gini',
  'min_samples_leaf': 3,
  'min_samples_split': 4,
  'n_estimators': 25},
 0.844808743169399)

### SVM

In [34]:
parameters = {'C': [1, 1.5, 2.0], # 83 without dummy variables and scaled data
              'kernel': ['rbf', 'poly', 'sigmoid', 'linear'],
              'degree': [2, 3, 4],
              'tol': [0.001, 0.0001]}
grid_search = GridSearchCV(estimator=SVC(), param_grid=parameters)
grid_search.fit(data, target)
best_parameters = grid_search.best_params_
best_result = grid_search.best_score_
best_parameters, best_result

({'C': 1, 'degree': 2, 'kernel': 'linear', 'tol': 0.001}, 0.8446994535519126)

### Neural Network


In [None]:
parameters = {'activation': ['relu', 'tanh', 'logistic'],
              'solver': ['lbfgs', 'sgd', 'adam']}
grid_search = GridSearchCV(estimator=MLPClassifier(), param_grid=parameters)
grid_search.fit(data, target)
best_parameters = grid_search.best_params_
best_result = grid_search.best_score_
best_parameters, best_result