In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import warnings
from sklearn import svm
warnings.filterwarnings(action='once') # reducing the amount of warning output
%matplotlib inline 

In [10]:
# import the data:
df = pd.read_csv('heart.csv')
df.columns

Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [11]:
# doing one hot encoding for categorical features
ohe_sex = pd.get_dummies(df.Sex, prefix='Sex')
ohe_chestpain = pd.get_dummies(df.ChestPainType, prefix='ChestPainType')
ohe_restingecg = pd.get_dummies(df.RestingECG, prefix='RestingECG')
ohe_exerciseangina = pd.get_dummies(df.ExerciseAngina, prefix='ExerciseAngina')
ohe_stslope = pd.get_dummies(df.ST_Slope, prefix='ST_Slope')
print (ohe_chestpain)

     ChestPainType_ASY  ChestPainType_ATA  ChestPainType_NAP  ChestPainType_TA
0                    0                  1                  0                 0
1                    0                  0                  1                 0
2                    0                  1                  0                 0
3                    1                  0                  0                 0
4                    0                  0                  1                 0
..                 ...                ...                ...               ...
913                  0                  0                  0                 1
914                  1                  0                  0                 0
915                  1                  0                  0                 0
916                  0                  1                  0                 0
917                  0                  0                  1                 0

[918 rows x 4 columns]


In [12]:
warnings.filterwarnings(action='once') # reducing the amount of warning output

# creating our variables to store the data
y = df['HeartDisease']
X = df[['Age', 'RestingBP','FastingBS','MaxHR','Oldpeak']]
# adding the encoded features
X['Male'] = ohe_sex['Sex_M']
X['Female'] = ohe_sex['Sex_F']

X['CPT_ASY'] = ohe_chestpain['ChestPainType_ASY']
X['CPT_ATA'] = ohe_chestpain['ChestPainType_ATA']
X['CPT_NAP'] = ohe_chestpain['ChestPainType_NAP']
X['CPT_TA'] = ohe_chestpain['ChestPainType_TA']

X['ECG_LVH'] = ohe_restingecg['RestingECG_LVH']
X['ECG_Normal'] = ohe_restingecg['RestingECG_Normal']
X['ECG_ST'] = ohe_restingecg['RestingECG_ST']

X['EA_N'] = ohe_exerciseangina['ExerciseAngina_N']
X['EA_Y'] = ohe_exerciseangina['ExerciseAngina_Y']

X['ST_Down'] = ohe_stslope['ST_Slope_Down']
X['ST_Flat'] = ohe_stslope['ST_Slope_Flat']
X['ST_Up'] = ohe_stslope['ST_Slope_Up']

# applying z standardization
scaler = preprocessing.StandardScaler().fit(X)

X = scaler.transform(X)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['Male'] = ohe_sex['Sex_M']


In [13]:
print (y)
print (X)

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64
[[-1.4331398   0.41090889 -0.55134134 ... -0.27144836 -1.00218103
   1.15067399]
 [-0.47848359  1.49175234 -0.55134134 ... -0.27144836  0.99782372
  -0.86905588]
 [-1.75135854 -0.12951283 -0.55134134 ... -0.27144836 -1.00218103
   1.15067399]
 ...
 [ 0.37009972 -0.12951283 -0.55134134 ... -0.27144836  0.99782372
  -0.86905588]
 [ 0.37009972 -0.12951283 -0.55134134 ... -0.27144836  0.99782372
  -0.86905588]
 [-1.64528563  0.30282455 -0.55134134 ... -0.27144836 -1.00218103
   1.15067399]]


In [27]:
# Split the testing and training data
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33)

In [32]:
cVals = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
def svm_linear(c, X_train, Y_train, X_test, Y_test):
    svc_linear = svm.SVC(probability = False, kernel = 'linear', C = c)
    svc_linear.fit(X_train, Y_train)
    
    Yhat_svc_linear_train = svc_linear.predict(X_train)
    
    acc_train = svc_linear.score(X_train, Y_train)
    acc_test = svc_linear.score(X_test, Y_test)
    
    print('C = ', c)
    print('Train Accuracy = {0:f}'.format(acc_train))
    print('Test Accuracy = {0:f}'.format(acc_test))

def svm_rbf(c, X_train, Y_train, X_test, Y_test):
    print('C = ', c)

    svc_rbf = svm.SVC(probability = False, kernel = 'rbf', C = c)
    
    svc_rbf.fit(X_train, Y_train)
    
    acc_train = svc_rbf.score(X_train,Y_train)
    print('Train Accuracy = {0:f}'.format(acc_train))

    acc_test = svc_rbf.score(X_test,Y_test)
    print('Test Accuracy = {0:f}'.format(acc_test))
    
def svm_polynomial(c, X_train, Y_train, X_test, Y_test):
    print('C = ', c)
    
    svc_polynomial = svm.SVC(probability = False, kernel = 'poly', C = c) 
    svc_polynomial.fit(X_train, Y_train)

    Yhat_svc_poly_train = svc_polynomial.predict(X_train)
    acc_train = svc_polynomial.score(X_train, Y_train)
    
    print('Train Accuracy = {0:f}'.format(acc_train))
    
    Yhat_svc_poly_test = svc_polynomial.predict(X_test)
    acc_test = svc_polynomial.score(X_test, Y_test)

    print('Test Accuracy = {0:f}'.format(acc_test))

In [33]:

print ("SVM_linear results: ")
for c in cVals:
    svm_linear(c, X_train, Y_train, X_test, Y_test)
    
# highest test accuracy: 
# C =  0.001
# Train Accuracy = 0.871545
# Test Accuracy = 0.854785

SVM_linear results: 
C =  0.0001
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.001
Train Accuracy = 0.871545
Test Accuracy = 0.854785
C =  0.01
Train Accuracy = 0.869919
Test Accuracy = 0.851485
C =  0.1
Train Accuracy = 0.869919
Test Accuracy = 0.848185
C =  1
Train Accuracy = 0.873171
Test Accuracy = 0.844884
C =  10
Train Accuracy = 0.873171
Test Accuracy = 0.844884
C =  100
Train Accuracy = 0.873171
Test Accuracy = 0.844884
C =  1000
Train Accuracy = 0.873171
Test Accuracy = 0.844884


In [34]:
print ("SVM_rbf results:")
for c in cVals:
    svm_rbf(c, X_train, Y_train, X_test, Y_test)

# highest test accuracy:
# C =  0.1
# Train Accuracy = 0.868293
# Test Accuracy = 0.851485

SVM_rbf results:
C =  0.0001
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.001
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.01
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.1
Train Accuracy = 0.868293
Test Accuracy = 0.851485
C =  1
Train Accuracy = 0.912195
Test Accuracy = 0.848185
C =  10
Train Accuracy = 0.962602
Test Accuracy = 0.818482
C =  100
Train Accuracy = 0.988618
Test Accuracy = 0.801980
C =  1000
Train Accuracy = 1.000000
Test Accuracy = 0.782178


In [35]:
print ("SVM_polynomial results:")
for c in cVals:
    svm_polynomial(c, X_train, Y_train, X_test, Y_test)

# highest test accuracy:
# C =  1
# Train Accuracy = 0.920325
# Test Accuracy = 0.848185

SVM_polynomial results:
C =  0.0001
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.001
Train Accuracy = 0.554472
Test Accuracy = 0.551155
C =  0.01
Train Accuracy = 0.559350
Test Accuracy = 0.554455
C =  0.1
Train Accuracy = 0.869919
Test Accuracy = 0.828383
C =  1
Train Accuracy = 0.920325
Test Accuracy = 0.848185
C =  10
Train Accuracy = 0.959350
Test Accuracy = 0.821782
C =  100
Train Accuracy = 0.986992
Test Accuracy = 0.782178
C =  1000
Train Accuracy = 1.000000
Test Accuracy = 0.775578
