## Performance Evaluation on Keras
 - automatic verification dataset
 - manual verification dataset.
 - k-fold cross-validation

## Automatic Verification
 - validation_split in `model.fit(X, Y,validation_split=0.33,  nb_epoch = 10, batch_size = 100)`

In [2]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

data_url = "https://raw.githubusercontent.com/uzay00/KaVe-Egitim/master/DerinOgrenme/Keras-Giris/pima-indians-diabetes.csv" 
data = pd.read_csv(data_url, skiprows=9, header = None)

# split into input (X) and output (Y) variables
X = data.iloc[:,0:8].values
Y = data.iloc[:,8].values

# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init= 'uniform' , activation= 'relu'))
model.add(Dense(8, init= 'uniform' , activation= 'relu' ))
model.add(Dense(1, init= 'uniform' , activation= 'sigmoid'))

# Compile model
model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])

# Fit Model
model.fit(X, Y,validation_split=0.33,  nb_epoch = 10, batch_size = 100)  ## validation_split !!

# evaluate the model
scores = model.evaluate(X, Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Using TensorFlow backend.


Instructions for updating:
keep_dims is deprecated, use keepdims instead




Train on 514 samples, validate on 254 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 65.10%


## Manuel Validation
 - use `from sklearn.model_selection import train_test_split`
 - `model.fit(X_train, y_train, validation_data=(X_test,y_test), nb_epoch=150, batch_size=10)`



In [4]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

data_url = "https://raw.githubusercontent.com/uzay00/KaVe-Egitim/master/DerinOgrenme/Keras-Giris/pima-indians-diabetes.csv" 
data = pd.read_csv(data_url, skiprows=9, header = None)

# split into input (X) and output (Y) variables
X = data.iloc[:,0:8].values
Y = data.iloc[:,8].values

# split into 67% for train and 33% for test
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=seed)

# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init= 'uniform' , activation= 'relu'))
model.add(Dense(8, init= 'uniform' , activation= 'relu' ))
model.add(Dense(1, init= 'uniform' , activation= 'sigmoid'))

# Compile model
model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])

# Fit Model
model.fit(X_train, y_train, validation_data=(X_test,y_test), nb_epoch=10, batch_size=10)

# evaluate the model
scores = model.evaluate(X, Y)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))



Train on 514 samples, validate on 254 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
acc: 65.36%


## Manual k-Fold Cross-Validation
To get less biased estimate of the performance of your model.
 - k subsets and takes turns training models on all subsets except one which is held out


In [5]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import StratifiedKFold

import numpy as np
import pandas as pd
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

data_url = "https://raw.githubusercontent.com/uzay00/KaVe-Egitim/master/DerinOgrenme/Keras-Giris/pima-indians-diabetes.csv" 
data = pd.read_csv(data_url, skiprows=9, header = None)

# split into input (X) and output (Y) variables
X = data.iloc[:,0:8].values
Y = data.iloc[:,8].values

# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=8, init= 'uniform' , activation= 'relu'))
    model.add(Dense(8, init= 'uniform' , activation= 'relu' ))
    model.add(Dense(1, init= 'uniform' , activation= 'sigmoid'))

    # Compile model
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy'])

    # Fit Model
    model.fit(X[train], Y[train], nb_epoch=10, batch_size=10, verbose = 0)

    # evaluate the model
    scores = model.evaluate(X[test], Y[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))



acc: 64.94%
acc: 70.13%
acc: 66.23%
acc: 77.92%
acc: 64.94%
acc: 66.23%
acc: 62.34%
acc: 58.44%
acc: 68.42%
acc: 65.79%
66.54% (+/- 4.85%)
