# Medical Diagnosis with Support Vector Machines

## Task 1: Import Libraries



In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## Task 1: Get Data

In [2]:
column_names = ["pregnancies", "glucose", "bpressure", "skinfold", "insulin", "bmi", "pedigree", "age", "class"]
df=pd.read_csv('data.csv',names=column_names)
df.head()

Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   pregnancies  768 non-null    int64  
 1   glucose      768 non-null    int64  
 2   bpressure    768 non-null    int64  
 3   skinfold     768 non-null    int64  
 4   insulin      768 non-null    int64  
 5   bmi          768 non-null    float64
 6   pedigree     768 non-null    float64
 7   age          768 non-null    int64  
 8   class        768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


## Task 1: Extract Features

In [4]:
x=df.iloc[:,:8]

## Task 1: Extract Class Labels

In [5]:
y=df['class']

## Task 2: Split Dataset

In [6]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25,random_state=101)


## Task 2: Normalize Features

In [7]:
sc=StandardScaler()
sc.fit(x_train)
x_train=sc.transform(x_train)

## Task 3: Training a Support Vector Machine

In [8]:
clf=svm.SVC(kernel='sigmoid')
clf.fit(x_train,y_train)

SVC(kernel='sigmoid')

## Task 3: Decision Boundary

In [9]:
y_pred=clf.predict(x_train)
print(accuracy_score(y_train,y_pred))

0.6736111111111112


## Task 3: SVM Kernels

In [10]:
for k in ('linear','poly','rbf','sigmoid'):
    clf=svm.SVC(kernel=k)
    clf.fit(x_train,y_train)
    y_pred=clf.predict(x_train)
    print(k)
    print(accuracy_score(y_train,y_pred))

linear
0.7743055555555556
poly
0.7986111111111112
rbf
0.8263888888888888
sigmoid
0.6736111111111112


## Task 4: Instantiating the Best Model

In [11]:
clf=svm.SVC(kernel='rbf')
clf.fit(x_train,y_train)

SVC()

## Task 4: Making a single prediction

In [12]:
# "pregnancies", "glucose", "bpressure", 
# "skinfold", "insulin", "bmi", 
# "pedigree", "age", "class"
patient = np.array([[ 1., 200., 75., 40., 0., 45.,1.5, 20. ],])
patient = sc.transform(patient)
pred = clf.predict(patient)
if pred == 1:
    print("Patient has diabetes")
if pred == 0:
    print("Patient does not have diabetes")


Patient has diabetes


## Task 4: Testing Set Prediction

In [13]:
x_test = sc.transform(x_test)
y_pred = clf.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.7864583333333334


## Task 5: Precision and Recall

In [14]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.89      0.84       124
           1       0.75      0.60      0.67        68

    accuracy                           0.79       192
   macro avg       0.77      0.75      0.75       192
weighted avg       0.78      0.79      0.78       192



In [15]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=9)

# Train the model using the training sets
model.fit(x_train,y_train)


KNeighborsClassifier(n_neighbors=9)

In [16]:
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.7864583333333334


In [17]:
from sklearn.model_selection import GridSearchCV

In [18]:
param_grid = {'C': [0.1, 1, 10, 100],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['linear','poly','rbf']}  
  
grid = GridSearchCV(clf, param_grid) 
  
# fitting the model for grid search 
grid.fit(x_train,y_train) 

# print best parameter after tuning 
print(grid.best_params_) 
grid_predictions = grid.predict(x_test) 
  
# print classification report 
print(classification_report(y_test, grid_predictions)) 

{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}
              precision    recall  f1-score   support

           0       0.79      0.88      0.83       124
           1       0.72      0.57      0.64        68

    accuracy                           0.77       192
   macro avg       0.76      0.73      0.74       192
weighted avg       0.77      0.77      0.76       192

