In [35]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, GridSearchCV, LeaveOneOut
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

In [47]:
heart_data = pd.read_csv("/home/yuxuan/kaggle/heart_failure_clinical_records_dataset.csv")

X = heart_data.iloc[:, 0:11]
y = heart_data['DEATH_EVENT']
# options = ['linear','rbf','poly']
options = 'rbf'
# for i in options:
selected_feature = ['serum_creatinine','age', 'ejection_fraction','creatinine_phosphokinase']
X_processed = X[selected_feature]
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, shuffle=True, random_state=1)

pipe1 = Pipeline([("scalar",MinMaxScaler()),("svm",SVC(random_state=1))])
pipe1.fit(X_train,y_train)
print("Test score: {:.3f}".format(pipe1.score(X_test,y_test)))

Test score: 0.850


In [48]:
pipe2 = Pipeline([("scalar",StandardScaler()),("svm",SVC(random_state=1))])
pipe2.fit(X_train,y_train)
print("Test score: {:.3f}".format(pipe2.score(X_test,y_test)))

Test score: 0.850


In [49]:
pipe3 = Pipeline([("scalar",RobustScaler()),("svm",SVC(random_state=1))])
pipe3.fit(X_train,y_train)
print("Test score: {:.3f}".format(pipe3.score(X_test,y_test)))


Test score: 0.800


## Test the grid search

In [None]:
param_grid = {"svm__C": [0.001,0.01,0.1,1,10,100],
              "svm__gamma":[0.001,0.01,0.1,1,10,100]}
              # "svm__kernel":["linear","rbf"]}
loo = LeaveOneOut()
grid1 = GridSearchCV(pipe1,param_grid, cv = loo,n_jobs=-1)
grid1.fit(X_train,y_train)
print("Best cross validation accuracy: {:.2f}".format(grid1.best_score_))
print("Test set score: {:.2f}".format(grid1.score(X_test,y_test)))
print("Best parameters: {}".format(grid1.best_params_))



In [40]:
grid1 = GridSearchCV(pipe1,param_grid, cv = 10,n_jobs=-1)
grid1.fit(X_train,y_train)
print("Best cross validation accuracy: {:.2f}".format(grid1.best_score_))
print("Test set score: {:.2f}".format(grid1.score(X_test,y_test)))
print("Best parameters: {}".format(grid1.best_params_))



Best cross validation accuracy: 0.74
Test set score: 0.83
Best parameters: {'svm__kernel': 'rbf', 'svm__gamma': 10, 'svm__C': 10}


In [52]:
param_grid = [{'kernel': ['rbf'],
               'C': [0.001,0.01,0.1,1,10,100],
               'gamma':[0.001,0.01,0.1,1,10,100]},
                {'kernel':['linear'],
               'C': [0.001,0.01,0.1,1,10,100]
                }]
print(param_grid)

grid_search = GridSearchCV(SVC(),param_grid,cv =10, n_jobs=-1)
grid_search.fit(X_train,y_train)
print("Best cross validation accuracy: {:.2f}".format(grid_search.best_score_))
print("Test set score: {:.2f}".format(grid_search.score(X_test,y_test)))
print("Best parameters: {}".format(grid_search.best_params_))




[{'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1, 10, 100], 'kernel': ['rbf']}, {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'kernel': ['linear']}]
Best cross validation accuracy: 0.74
Test set score: 0.82
Best parameters: {'C': 0.1, 'kernel': 'linear'}
