In [1]:
# Keras import
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization
from keras.wrappers.scikit_learn import KerasClassifier

# Sklearn import
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
num_classes = 10

print("X_train shape: {}".format(X_train.shape))
print("X_train type: {}".format(type(X_train)))
print("y_train shape: {}".format(y_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

X_train shape: (60000, 28, 28)
X_train type: <class 'numpy.ndarray'>
y_train shape: (60000,)
X_test shape: (10000, 28, 28)
y_test shape: (10000,)


In [3]:
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))

X_train shape: (60000, 784)
X_test shape: (10000, 784)


In [4]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print(y_train[0])

y_train shape: (60000, 10)
y_test shape: (10000, 10)
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [5]:
#Two-Layer Network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(784,)))
model.add(Dense(64, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [6]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  29.673668146133423


In [7]:
#### without batch normalization
model = Sequential()

#input layer
model.add(Dense(64, input_shape=(784,)))
model.add(Activation('relu'))

#Hidden layer
model.add(Dense(64))
model.add(Activation('relu'))

#output layer
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                50240     
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_2 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                650       
Total params: 55,050
Trainable params: 55,050
Non-trainable params: 0
_________________________________________________________________


In [8]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  27.784837245941162


In [9]:
#### with bath normalization
model = Sequential()

#input layer
model.add(Dense(64, input_shape=(784,)))
model.add(BatchNormalization())
model.add(Activation('relu'))

#Hidden layer
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))

#output layer
model.add(Dense(num_classes, activation='softmax'))

model.summary()
model.compile(
            loss=keras.losses.categorical_crossentropy,
            optimizer='adam',
            metrics=['accuracy']
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 64)                50240     
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
activation_3 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 64)                4160      
_________________________________________________________________
batch_normalization_2 (Batch (None, 64)                256       
_________________________________________________________________
activation_4 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                650       
Total para

In [10]:
start_time = time.time()
model.fit(
    X_train, 
    y_train,
    epochs=10,
    batch_size=100,
    verbose=1
)
print("End Time: ", time.time() - start_time)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
End Time:  40.358068227767944


In [11]:
#### initializer
def create_model(init='zeros'):
    model = Sequential()

    #input layer
    model.add(Dense(64, kernel_initializer=init, input_shape=(784,)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    #Hidden layer
    model.add(Dense(64, kernel_initializer=init))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    #output layer
    model.add(Dense(num_classes, activation='softmax'))

    model.summary()
    model.compile(
                loss=keras.losses.categorical_crossentropy,
                optimizer='adam',
                metrics=['accuracy']
    )
    
    return model

In [14]:
param_grid = {
    'init': ['zeros', 'ones', 'glorot_uniform', 'normal', 'uniform'],
}

In [15]:
start_timestart_ti  = time.time()
model = KerasClassifier(build_fn=create_model, epochs=10, verbose=1)

grid = GridSearchCV(estimator=model, cv=2, param_grid=param_grid)
grid_result = grid.fit(X_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
print("End Time: ", time.time() - start_time)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_3 (Batch (None, 64)                256       
_________________________________________________________________
activation_5 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 64)                4160      
_________________________________________________________________
batch_normalization_4 (Batch (None, 64)                256       
_________________________________________________________________
activation_6 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                650       
Total para

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_11 (Batc (None, 64)                256       
_________________________________________________________________
activation_13 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_23 (Dense)             (None, 64)                4160      
_________________________________________________________________
batch_normalization_12 (Batc (None, 64)                256       
_________________________________________________________________
activation_14 (Activation)   (None, 64)                0         
_______________________________

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_31 (Dense)             (None, 64)                50240     
_________________________________________________________________
batch_normalization_17 (Batc (None, 64)                256       
_________________________________________________________________
activation_19 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_32 (Dense)             (None, 64)                4160      
_________________________________________________________________
batch_normalization_18 (Batc (None, 64)                256       
_________________________________________________________________
activation_20 (Activation)   (None, 64)                0         
_________________________________________________________________
dense_33 (Dense)             (None, 10)                650       
Total para

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best: 0.970000 using {'init': 'uniform'}
0.112367 (0.001733) with: {'init': 'zeros'}
0.764167 (0.005267) with: {'init': 'ones'}
0.968100 (0.000567) with: {'init': 'glorot_uniform'}
0.969633 (0.000933) with: {'init': 'normal'}
0.970000 (0.001267) with: {'init': 'uniform'}
End Time:  686.130140542984
