# Work on Diabetes dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("diabetes.csv",index_col = False)

In [4]:
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


In [5]:
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [8]:
values = data.values
x = values[:,0:8]
y = values[:,8]


In [9]:
x[0]

array([   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
          0.627,   50.   ])

In [10]:
y[0]

1.0

## Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn import datasets


In [16]:
xtr,xt,ytr,yt=train_test_split(x, y, test_size=0.1, random_state=42)

In [17]:
xtr.shape

(691, 8)

In [18]:
log = LogisticRegression()
log.fit(xtr,ytr)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [20]:
ytp= log.predict(xt)

In [21]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.27


In [25]:
from sklearn.metrics import accuracy_score
print("accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")


accuracy_score =  72.7272727273 %


## Decision Tree

In [36]:
from sklearn import tree

clf = tree.DecisionTreeClassifier()
clf = clf.fit(xtr,ytr)

In [38]:
ytp=clf.predict(xt)

In [39]:
print("Decision tree accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")

Decision tree accuracy_score =  79.2207792208 %


In [44]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.30


## Naive Bayes

In [41]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()

In [42]:
gnb.fit(xtr,ytr)

GaussianNB(priors=None)

In [43]:
ytp = gnb.predict(xt)

In [45]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.30


In [46]:
print("Naive bayes accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")

Naive bayes accuracy_score =  70.1298701299 %


## KNN

In [47]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(xtr, ytr) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

In [48]:
ytp = neigh.predict(xt)

In [49]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.34


In [50]:
print("Naive bayes accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")

Naive bayes accuracy_score =  66.2337662338 %


## Support Vector Machine

In [54]:
from sklearn import svm
clf = svm.SVC(gamma='scale')

In [57]:
import numpy as np
X = np.array(xtr)
y = np.array(ytr)
from sklearn.svm import SVC
clf = SVC(gamma='auto')
clf.fit(X, y) 

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [58]:
ytp = clf.predict(xt)

In [59]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.35


In [60]:
print("SVM accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")

SVM accuracy_score =  64.9350649351 %


## Neural Networks

In [72]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation
from keras.utils import np_utils


In [105]:
m = Sequential()
m.add(Dense(8, input_shape=(8,)))
m.add(BatchNormalization())
m.add(Activation("sigmoid"))
    
m.add(Dense(6))
m.add(BatchNormalization())
m.add(Activation("relu"))
m.add(Dropout(0.4))
    
m.add(Dense(2, activation="relu"))
    
# output layer
m.add(Dense(1, activation='sigmoid'))

m.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 8)                 72        
_________________________________________________________________
batch_normalization_14 (Batc (None, 8)                 32        
_________________________________________________________________
activation_15 (Activation)   (None, 8)                 0         
_________________________________________________________________
dense_28 (Dense)             (None, 6)                 54        
_________________________________________________________________
batch_normalization_15 (Batc (None, 6)                 24        
_________________________________________________________________
activation_16 (Activation)   (None, 6)                 0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 6)                 0         
__________

In [106]:
m.compile(loss='mean_squared_logarithmic_error', optimizer='adam', metrics=['accuracy'])

In [107]:
m.fit(xtr, ytr, nb_epoch=10, batch_size=30)


  """Entry point for launching an IPython kernel.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe014d7dac8>

In [108]:
ytp = m.predict(xt)

In [109]:
print("Mean squared error: %.2f" % mean_squared_error(yt,ytp))

Mean squared error: 0.21


In [110]:
 ytp
#print("SVM accuracy_score = " , accuracy_score(yt, ytp)*100 , "%")

array([[ 0.39666575],
       [ 0.33090523],
       [ 0.45238453],
       [ 0.41507268],
       [ 0.48247635],
       [ 0.43278593],
       [ 0.07622745],
       [ 0.46485716],
       [ 0.46577397],
       [ 0.47381541],
       [ 0.4303925 ],
       [ 0.48247635],
       [ 0.456092  ],
       [ 0.41416553],
       [ 0.25513977],
       [ 0.456092  ],
       [ 0.14678577],
       [ 0.15907115],
       [ 0.42589343],
       [ 0.45969489],
       [ 0.47286457],
       [ 0.11116166],
       [ 0.38192561],
       [ 0.30352989],
       [ 0.46195954],
       [ 0.48247635],
       [ 0.39204848],
       [ 0.27603295],
       [ 0.20542048],
       [ 0.18332151],
       [ 0.47253576],
       [ 0.47160798],
       [ 0.456092  ],
       [ 0.45135516],
       [ 0.48236096],
       [ 0.456092  ],
       [ 0.37632242],
       [ 0.28737769],
       [ 0.48247635],
       [ 0.46649271],
       [ 0.24008277],
       [ 0.48247635],
       [ 0.4667584 ],
       [ 0.48247635],
       [ 0.09323937],
       [ 0