In [1]:
# Install tensorflow and keras libraries first.  Code in command prompt:
##     conda install -c conda-forge tensorflow, keras

import numpy as np
from keras.models import Sequential

# The core data structure of Keras is a model, a way to organize layers.

model = Sequential() # Define the architecture of you model using Sequential.  



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
#Build layers with Dense, followed by Activation()...

from keras.models import Sequential
from keras.layers import Dense, Activation

# one hidden layer with 32 nodes
# Activation is set to relu
# one output layer with 10 categories.  
# softmax function used to calculate 0 to 1 probabilities for each of 10 categories

model = Sequential([
    Dense(32, input_shape=(784,)),
    Activation('relu'),
    Dense(10),
    Activation('softmax'),
])

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 32)                25120     
_________________________________________________________________
activation_5 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                330       
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 25,450
Trainable params: 25,450
Non-trainable params: 0
_________________________________________________________________


In [6]:
# model with two hidden layers

model = Sequential([
    Dense(32, input_shape=(784,)),
    Activation('relu'),
    Dense(32),
    Activation('relu'),
    Dense(10),
    Activation('softmax'),
])


model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 32)                25120     
_________________________________________________________________
activation_10 (Activation)   (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 32)                1056      
_________________________________________________________________
activation_11 (Activation)   (None, 32)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                330       
_________________________________________________________________
activation_12 (Activation)   (None, 10)                0         
Total params: 26,506
Trainable params: 26,506
Non-trainable params: 0
_________________________________________________________________


In [7]:
#Or build a model in steps using .add():

from keras.layers import Dense

model = Sequential() 
model.add(Dense(units=64, activation='relu', input_dim=100))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=10, activation='softmax'))

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 64)                6464      
_________________________________________________________________
dense_14 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_15 (Dense)             (None, 10)                650       
Total params: 11,274
Trainable params: 11,274
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Once your model looks good, configure its learning process with .compile():

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])



**loss can be set to:**
    - 'categorical_crossentropy' for multiple categories
    - 'binary_crossentropy' for binary categories
    - 'mse' for regression, which calculates the mse

**optimizer can be set to 'sgd' for stochastic gradient descent or a variety of other techniques.** 

## Training a keras model

Keras models are trained on Numpy arrays of input data and labels. For training a model, you will typically use the  fit function.

In [34]:
# For a single-input model with 2 classes (binary classification):

model = Sequential()
model.add(Dense(32, activation='relu', input_dim=100))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Generate dummy data
import numpy as np
data = np.random.random((1000, 100)) # X data
labels = np.random.randint(2, size=(1000, 1)) # y data

# Train the model, iterating on the data in batches of 32 samples
model.fit(data, labels, validation_split=0.20, epochs=100, batch_size=32)

#Note that you can also use train_test_split() with , validation_data=(X_test,y_test) argument from Keras in same manner.
##Split data first and then simply train on training data and add test data to this argument.


Train on 800 samples, validate on 200 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1befde829e8>

In [35]:
# for multiple categories you  need to one hot encode y using to_categorical()

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD

# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)

model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(32, activation='relu', input_dim=20))
model.add(Dense(10, activation='softmax'))

sgd = SGD(lr=0.01)  # define a learning rate for optimization

model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128) # extract loss and accuracy from test data evaluation
print(score)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[2.3609211444854736, 0.10000000149011612]


In [22]:
import numpy as np

# Prediction from keras classification model
print(x_test.shape)

# for predicted probabilities and labels
ypreds = model.predict_classes(x_test)
print(ypreds)

# for predicted probabilities
ypreds = model.predict_proba(x_test)
print(ypreds) #gives prediction of each category, largest is selected for predict_classes()


(100, 20)
[7 6 8 7 5 1 7 6 6 1 6 1 6 4 1 7 4 8 7 1 7 1 4 6 1 1 1 8 1 1 7 1 4 7 1 7 1
 6 8 7 0 1 1 8 1 8 7 1 7 1 7 5 1 3 1 6 7 1 8 1 6 7 7 7 1 8 6 4 5 7 4 8 4 0
 5 1 1 8 6 4 4 8 1 1 8 7 4 6 0 7 1 1 7 6 1 1 6 1 7 1]
[[0.07374199 0.12213745 0.07685741 0.10939735 0.08792505 0.09080634
  0.10589595 0.12896806 0.11801413 0.08625622]
 [0.09849285 0.11407992 0.09522335 0.090311   0.09342678 0.09299271
  0.11455514 0.11118037 0.10531382 0.08442403]
 [0.09952311 0.10705225 0.07021143 0.10744977 0.11171241 0.10613842
  0.10430415 0.07836949 0.11576303 0.09947591]
 [0.07571634 0.10092724 0.05257684 0.10454383 0.10507186 0.11376609
  0.12094142 0.1378554  0.10137254 0.08722848]
 [0.10157811 0.11081247 0.07302648 0.09588359 0.10053616 0.12354809
  0.10258005 0.07706122 0.11739568 0.09757815]
 [0.07336328 0.14790872 0.08815695 0.09500729 0.09210476 0.07790933
  0.11436201 0.11360762 0.1370281  0.0605519 ]
 [0.08506425 0.10187812 0.07374813 0.09658505 0.11567329 0.09795389
  0.10892223 0.13741407 0.10

In [23]:
# Prediction from keras regression model

# for predicted probabilities and labels
ypreds = model.predict(x_test)



## Evaluate and predict keras model with sklearn wrapper

In [24]:
# Use scikit-learn to grid search the batch size and epochs
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
import numpy
import pandas as pd
# Use KerasRegressor for regression model tuning

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# load pimas diabetes dataset
dataset = pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/MASS/Pima.te.csv", delimiter=",")

X = dataset.iloc[:,1:6]
Y = dataset.iloc[:,7]
print(X.shape)

(332, 5)


In [30]:
# simple example

# Function to create model, required for KerasClassifier
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=5, activation='relu'))
	model.add(Dense(8, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
	return model

model = KerasClassifier(build_fn=create_model, epochs=100, verbose=0) # epochs arg is built in to Scikit learn's... 
                                                                      # KerasClassifier

# Building a simple search grid that adjusts epochs
param_grid = dict(epochs=[10,20,30])
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, Y)


In [32]:
# grid_result.cv_results_ for full results file
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


Best: 0.123494 using {'epochs': 10}


## Tuning different parameters

In [33]:

# Create function that builds model
# Function to create model, required for KerasClassifier

#In order to tune parameters native to keras, add them as arguments to your create_model function

def create_model(learn_rate=0.01):
	# create model
	model = Sequential()
	model.add(Dense(12, input_dim=5, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	optimizer = SGD(lr=learn_rate)
	model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
	return model

#call model function in KerasClassifier
model = KerasClassifier(build_fn=create_model, epochs=20, verbose=0)

# define the grid search parameters
learn_rate = [0.001, 0.01]

param_grid = dict(learn_rate=learn_rate) # set dictionary using function this time

#Using n_jobs=-1 to parallelize across available processors to speed it up
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.123494 using {'learn_rate': 0.001}
0.123494 (0.004543) with: {'learn_rate': 0.001}
0.123494 (0.004543) with: {'learn_rate': 0.01}


## Now you try.  Can you fit a neural network model to the Iris dataset?  Run models that change the structure of the network (i.e.-hidden layers and activations).  Try to improve your validation accuracy as much as possible.

Data can be imported via the following link:

http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv

In [42]:

import pandas as pd
# target = InMichelin, whether or not a restaurant is in the Michelin guide
data = pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv" , encoding="latin_1")


#update data to set up for train test split
data = data.iloc[:,1:]
y = data['Species']
X = data.loc[:, data.columns != 'Species']

display(data.head())
display(X.head())
display(y[0::10])

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


0          setosa
10         setosa
20         setosa
30         setosa
40         setosa
50     versicolor
60     versicolor
70     versicolor
80     versicolor
90     versicolor
100     virginica
110     virginica
120     virginica
130     virginica
140     virginica
Name: Species, dtype: object