<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Predictive-neural-network-using-Keras" data-toc-modified-id="Predictive-neural-network-using-Keras-1">Predictive neural network using Keras</a></span><ul class="toc-item"><li><span><a href="#Run-a-multilayer-perceptron-with-two-hidden-layers" data-toc-modified-id="Run-a-multilayer-perceptron-with-two-hidden-layers-1.1">Run a multilayer perceptron with two hidden layers</a></span></li><li><span><a href="#selecting-the-number-of-hidden-units-using-GridSearchCV-and-evaluation-on-a-test-set." data-toc-modified-id="selecting-the-number-of-hidden-units-using-GridSearchCV-and-evaluation-on-a-test-set.-1.2">selecting the number of hidden units using GridSearchCV and evaluation on a test-set.</a></span></li><li><span><a href="#Describe-the-differences-in-the-predictive-accuracy-of-models-with-different-numbers-of-hidden-units." data-toc-modified-id="Describe-the-differences-in-the-predictive-accuracy-of-models-with-different-numbers-of-hidden-units.-1.3">Describe the differences in the predictive accuracy of models with different numbers of hidden units.</a></span></li><li><span><a href="#Describe-the-predictive-strength-of-your-best-model." data-toc-modified-id="Describe-the-predictive-strength-of-your-best-model.-1.4">Describe the predictive strength of your best model.</a></span></li></ul></li></ul></div>

# Predictive neural network using Keras

In [1]:
import pandas as pd
# target = InMichelin, whether or not a restaurant is in the Michelin guide
data = pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv" , encoding="latin_1")


#update data to set up for train test split
data = data.iloc[:,1:]
y = data['Species']
X = data.loc[:, data.columns != 'Species']

display(data.head())

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Run a multilayer perceptron with two hidden layers

In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, random_state=42)

print(X_train.shape)
print(y_train.shape)

(112, 4)
(112, 3)


In [4]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD

In [4]:
model = Sequential()

model.add(Dense(32, activation='relu', input_dim=4))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(3, activation='softmax'))

sgd = SGD(lr=0.01)  # define a learning rate for optimization

model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(X_train, y_train,
          epochs=20,
          batch_size=128)

score = model.evaluate(X_test, y_test, batch_size=128) # extract loss and accuracy from test data evaluation
print(score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[0.9344544410705566, 0.6842105388641357]


## selecting the number of hidden units using GridSearchCV and evaluation on a test-set. 

In [29]:

from sklearn.model_selection import GridSearchCV

nodes = [4, 8, 16, 32, 64, 128] 
lrs = [0.0001, 0.001, 0.01] # learning rate, default = 0.001


In [5]:
# define baseline model
def create_model(nodes=8, lr=0.001):
    model = Sequential()
    model.add(Dense(nodes, activation='relu', input_dim=4))
    model.add(Dense(nodes, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer= SGD(lr=lr), metrics=['accuracy'])
    return model

model = KerasClassifier(build_fn=create_model, 
                        epochs=15, batch_size=128, verbose=0)


In [31]:
param_grid = dict(nodes=nodes, lr=lrs)

grid = GridSearchCV(estimator=model, 
                    param_grid=param_grid, 
                    n_jobs=1)

grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


Best: 0.616071 using {'lr': 0.01, 'nodes': 32}


## Describe the differences in the predictive accuracy of models with different numbers of hidden units.  

In [32]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

0.339286 (0.048580) with: {'lr': 0.0001, 'nodes': 4}
0.339286 (0.037347) with: {'lr': 0.0001, 'nodes': 8}
0.348214 (0.022401) with: {'lr': 0.0001, 'nodes': 16}
0.294643 (0.055563) with: {'lr': 0.0001, 'nodes': 32}
0.330357 (0.086570) with: {'lr': 0.0001, 'nodes': 64}
0.294643 (0.055563) with: {'lr': 0.0001, 'nodes': 128}
0.339286 (0.016837) with: {'lr': 0.001, 'nodes': 4}
0.455357 (0.136572) with: {'lr': 0.001, 'nodes': 8}
0.330357 (0.211005) with: {'lr': 0.001, 'nodes': 16}
0.375000 (0.022469) with: {'lr': 0.001, 'nodes': 32}
0.348214 (0.291176) with: {'lr': 0.001, 'nodes': 64}
0.562500 (0.106583) with: {'lr': 0.001, 'nodes': 128}
0.357143 (0.072262) with: {'lr': 0.01, 'nodes': 4}
0.535714 (0.142600) with: {'lr': 0.01, 'nodes': 8}
0.339286 (0.027679) with: {'lr': 0.01, 'nodes': 16}
0.616071 (0.211581) with: {'lr': 0.01, 'nodes': 32}
0.616071 (0.130321) with: {'lr': 0.01, 'nodes': 64}
0.437500 (0.137746) with: {'lr': 0.01, 'nodes': 128}


## Describe the predictive strength of your best model. 

In [35]:
bestmodel = Sequential()
bestmodel.add(Dense(32, activation='relu', input_dim=4))
bestmodel.add(Dense(32, activation='relu'))
bestmodel.add(Dense(3, activation='softmax'))
bestmodel.compile(loss='categorical_crossentropy',
                 optimizer= SGD(lr=0.01), metrics=['accuracy'])

bestmodel.fit(X_train, y_train,
          epochs=20,
          batch_size=128)

score = bestmodel.evaluate(X_test, y_test, batch_size=128) # extract loss and accuracy from test data evaluation
print(score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[0.7597740292549133, 0.7631579041481018]


In [10]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

def best_model():
    model = Sequential()
    model.add(Dense(8, activation='relu', input_dim=4))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer= SGD(lr=0.01), metrics=['accuracy'])
    return model

estimator = KerasClassifier(build_fn=best_model, epochs=15, batch_size=128, verbose=0)


kfold = KFold(n_splits=10, shuffle=True, random_state=10)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)

print("Cross_validation accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Cross_validation accuracy: 48.00% (21.87%)


**Through cross validation, the best model can predict about half correct**