In [28]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [29]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import warnings
warnings.filterwarnings(action='ignore')

plt.style.use('seaborn')

####  Getting data ready

In [30]:
heart_disease = pd.read_csv('data/heart-disease.csv')
heart_disease.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [31]:
X = heart_disease.drop('target', axis=1)
y = heart_disease['target']


In [32]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [33]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((242, 13), (61, 13), (242,), (61,))

#### Preparing a machine learning model

In [34]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier().fit(X_train, y_train)

In [35]:
y_preds = clf.predict(X_test)

####  Evaluating a model's performance 

In [36]:
clf.score(X_train, y_train)

1.0

In [37]:
clf.score(X_test, y_test)

0.8852459016393442

#### Running different estimators

In [38]:
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [44]:
linear_model = LinearSVC()
linear_model.fit(X_train, y_train)
linear = linear_model.score(X_test, y_test)

near_model = KNeighborsClassifier()
near_model.fit(X_train, y_train)
near = near_model.score(X_test, y_test)

svc_model = SVC()
svc_model.fit(X_train, y_train)
svc = svc_model.score(X_test, y_test)

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
logistic = logistic_model.score(X_test, y_test)

models = {'linear' : linear_model,
          'K nearest' : near_model,
          'SVC' : svc_model,
          'logistic' : logistic_model,
         }

In [50]:
example_result = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    example_result[model_name] = model.score(X_test, y_test)
example_result

{'linear': 0.8524590163934426,
 'K nearest': 0.7213114754098361,
 'SVC': 0.6557377049180327,
 'logistic': 0.8688524590163934}

In [55]:
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    results[name] = model.score(X_test, y_test)
results

{'linear': 0.7540983606557377,
 'K nearest': 0.7213114754098361,
 'SVC': 0.6557377049180327,
 'logistic': 0.8688524590163934}

In [59]:
results.keys()

dict_keys(['linear', 'K nearest', 'SVC', 'logistic'])

In [63]:
result_df = pd.DataFrame(results.values(), index=results.keys(), columns=['Accuracy'])
result_df

Unnamed: 0,Accuracy
linear,0.754098
K nearest,0.721311
SVC,0.655738
logistic,0.868852
