In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
dataset = pd.read_csv('diabetes-dataset.csv')
dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,2,138,62,35,0,33.6,0.127,47,1
1,0,84,82,31,125,38.2,0.233,23,0
2,0,145,0,0,0,44.2,0.63,31,1
3,0,135,68,42,250,42.3,0.365,24,1
4,1,139,62,41,480,40.7,0.536,21,0


In [3]:
feature_names = dataset.columns
print(feature_names)

x = dataset.drop(["Outcome"],axis=1)
y=dataset["Outcome"]

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state=6)

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')


## Experiment # 1
### SVG using kernal as poly, rbf, and sigmoid,

In [5]:
kernels = ['linear','poly', 'rbf', 'sigmoid']

for kernel in kernels:
    model = SVC(kernel=kernel)
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)
    print('SVM using Kernel as ',kernel,':',accuracy_score(Y_test,Y_pred))

SVM using Kernel as  linear : 0.775
SVM using Kernel as  poly : 0.765
SVM using Kernel as  rbf : 0.77
SVM using Kernel as  sigmoid : 0.445


We can clearly see that kernel=linear outperforms all other kernel types. However, kernel=rbs is quite close to linear kernel

## Experiment # 2

### SVG using Kernel as poly with various degrees of polynomials

In [None]:
for deg in range(1,10):
    model = SVC(kernel='poly',degree=deg)
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)
    print('SVM using Kernel as poly with degree = ',deg,':',accuracy_score(Y_test,Y_pred))

1. SVM using Kernel as poly with degree =  1 : 0.7683333333333333
2. SVM using Kernel as poly with degree =  2 : 0.775
3. SVM using Kernel as poly with degree =  3 : 0.765
4. SVM using Kernel as poly with degree =  4 : 0.7883333333333333
5. SVM using Kernel as poly with degree =  5 : 0.7933333333333333
6. SVM using Kernel as poly with degree =  6 : 0.7883333333333333
7. SVM using Kernel as poly with degree =  7 : 0.7883333333333333
8. SVM using Kernel as poly with degree =  8 : 0.7883333333333333
9. SVM using Kernel as poly with degree =  9 : 0.7916666666666666
10. SVM using Kernel as poly with degree =  10 : 0.785

We can see from above accuracy scores that model has improved its accuracy from degree = 1 to degree = 5 and any bigger polynomial degree is not worth it and is wastage of resource. 

Hence, we can specify degree = 5 which gives us better accuracy score.


## Experiment # 3

### SVG using different Regularization Parameter with default kernel

In [6]:
for c in range(1,20):
    model = SVC(C=c)
    model.fit(X_train,Y_train)
    Y_pred = model.predict(X_test)
    print('SVM using default kernel and Regularization Parameter = ',c,':',accuracy_score(Y_test,Y_pred))

SVM using default kernel and Regularization Parameter =  1 : 0.77
SVM using default kernel and Regularization Parameter =  2 : 0.77
SVM using default kernel and Regularization Parameter =  3 : 0.7666666666666667
SVM using default kernel and Regularization Parameter =  4 : 0.775
SVM using default kernel and Regularization Parameter =  5 : 0.775
SVM using default kernel and Regularization Parameter =  6 : 0.7783333333333333
SVM using default kernel and Regularization Parameter =  7 : 0.7783333333333333
SVM using default kernel and Regularization Parameter =  8 : 0.7783333333333333
SVM using default kernel and Regularization Parameter =  9 : 0.7733333333333333
SVM using default kernel and Regularization Parameter =  10 : 0.7766666666666666
SVM using default kernel and Regularization Parameter =  11 : 0.7783333333333333
SVM using default kernel and Regularization Parameter =  12 : 0.7783333333333333
SVM using default kernel and Regularization Parameter =  13 : 0.7783333333333333
SVM using 

We can see from above Regularization Parameters that the best accuracy score which is quite redundent is 0.778.
Hence, hwe can specify C = 6.

## Experiment # 4

### SVG using best configuration, found from above analysis and improvement technqiues
#### 1. Kernel = 'linear'
#### 2. Degree = 5
#### 3. Regularization Parameter = 6

In [4]:
k = 'linear'
d = 5
c = 6

model = SVC(kernel=k,degree=d,C=c)
model.fit(X_train,Y_train)
Y_pred = model.predict(X_test)
print('SVM using best configuration: ',accuracy_score(Y_test,Y_pred))

SVM using best configuration:  0.78


We can conclude from above experiments as:
    1. Using Linear kernel gave us improved accuracy score (0.775) as compared to other kernel types.
    2. Using 5th degree polynomial gaves us improved accuracy score (0.793) as compared to other degrees.
    3. Using 6 as Regularization Parameter gaves us improved accuracy score (0.778) as compared to other parameters.
    
Hence, we can conclude that above configuration is giving us the best accuracy from above experiments so we did another experiment with support vector machine and used the best configuration where kernel=linear, degree=5 and C=6 and found out that the model accuracy (0.78) is better than 1st (0.775) and 3rd (0.778) experiment but 2nd model outperforms all the support vectors machines with highest accuracy of 0.793.