In [1]:
from __future__ import print_function

import numpy as np

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers.core import Dense, Activation

from keras.optimizers import SGD

from keras.utils import np_utils

np.random.seed(1671) # for reproducibility

# network and training

NB_EPOCH = 10

BATCH_SIZE = 16

VERBOSE = 0

NB_CLASSES = 10 # number of outputs = number of digits

OPTIMIZER = SGD() # optimizer, explained later in this chapter

N_HIDDEN = 128

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION

# data: shuffled and split between train and test sets

(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784

RESHAPED = 784

#

X_train = X_train.reshape(60000, RESHAPED)

X_test = X_test.reshape(10000, RESHAPED)

X_train = X_train.astype('float32')

X_test = X_test.astype('float32')

# normalize

X_train /= 255

X_test /= 255

print(X_train.shape[0], 'train samples')

print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices

Y_train = np_utils.to_categorical(y_train, NB_CLASSES)

Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# M_HIDDEN hidden layers

# 10 outputs

# final stage is softmax

model1 = Sequential()

model1.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))

model1.add(Activation('relu'))

model1.add(Dense(N_HIDDEN))

model1.add(Activation('relu'))

model1.add(Dense(NB_CLASSES))

model1.add(Activation('softmax'))

model1.summary()

model1.compile(loss='categorical_crossentropy',

optimizer=OPTIMIZER,

metrics=['accuracy'])

history1=model1.fit(X_train, Y_train,

batch_size=BATCH_SIZE, epochs=NB_EPOCH,

verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model1.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])

print('Test accuracy:', score[1])

Using TensorFlow backend.


60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [2]:
from __future__ import print_function

import numpy as np

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers.core import Dense, Activation

from keras.optimizers import SGD

from keras.utils import np_utils

np.random.seed(1671) # for reproducibility

# network and training

NB_EPOCH = 10

BATCH_SIZE = 32

VERBOSE = 0

NB_CLASSES = 10 # number of outputs = number of digits

OPTIMIZER = SGD() # optimizer, explained later in this chapter

N_HIDDEN = 128

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION

# data: shuffled and split between train and test sets

(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784

RESHAPED = 784

#

X_train = X_train.reshape(60000, RESHAPED)

X_test = X_test.reshape(10000, RESHAPED)

X_train = X_train.astype('float32')

X_test = X_test.astype('float32')

# normalize

X_train /= 255

X_test /= 255

print(X_train.shape[0], 'train samples')

print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices

Y_train = np_utils.to_categorical(y_train, NB_CLASSES)

Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# M_HIDDEN hidden layers

# 10 outputs

# final stage is softmax

model2 = Sequential()

model2.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))

model2.add(Activation('relu'))

model2.add(Dense(N_HIDDEN))

model2.add(Activation('relu'))

model2.add(Dense(NB_CLASSES))

model2.add(Activation('softmax'))

model2.summary()

model2.compile(loss='categorical_crossentropy',

optimizer=OPTIMIZER,

metrics=['accuracy'])

history2= model2.fit(X_train, Y_train,

batch_size=BATCH_SIZE, epochs=NB_EPOCH,

verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model2.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])

print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [3]:
from __future__ import print_function

import numpy as np

from keras.datasets import mnist

from keras.models import Sequential

from keras.layers.core import Dense, Activation

from keras.optimizers import SGD

from keras.utils import np_utils

np.random.seed(1671) # for reproducibility

# network and training

NB_EPOCH = 20

BATCH_SIZE = 256

VERBOSE = 0

NB_CLASSES = 10 # number of outputs = number of digits

OPTIMIZER = SGD() # optimizer, explained later in this chapter

N_HIDDEN = 128

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION

# data: shuffled and split between train and test sets

(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784

RESHAPED = 784

#

X_train = X_train.reshape(60000, RESHAPED)

X_test = X_test.reshape(10000, RESHAPED)

X_train = X_train.astype('float32')

X_test = X_test.astype('float32')

# normalize

X_train /= 255

X_test /= 255

print(X_train.shape[0], 'train samples')

print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices

Y_train = np_utils.to_categorical(y_train, NB_CLASSES)

Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# M_HIDDEN hidden layers

# 10 outputs

# final stage is softmax

model3 = Sequential()

model3.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))

model3.add(Activation('relu'))

model3.add(Dense(N_HIDDEN))
model3.add(Activation('relu'))

model3.add(Dense(NB_CLASSES))

model3.add(Activation('softmax'))

model3.summary()

model3.compile(loss='categorical_crossentropy',

optimizer=OPTIMIZER,

metrics=['accuracy'])

history3= model3.fit(X_train, Y_train,

batch_size=BATCH_SIZE, epochs=NB_EPOCH,

verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model3.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])

print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_7 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_9 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

## Analysis:

The **batch_size** has been changed for all three models. For **model1** the **batch_size** is **16**.
For **model2** has a  **batch_size** of **32** and **model3** has a batch size of **256**.

The test accuracy for all three models differs only slightly:

**model1** Test accuracy: **0.9761999845504761**\
**model2** Test accuracy: **0.9693999886512756**\
**model3** Test accuracy: **0.9275000095367432**

**model1** Provided the best results compared to other two test models. Though all three **model1**, **model2**, and **model3** did not provide statistically any significant change.

While changing the data within **model2** and **model3** the accuracy fell only slightly but was still a drop in accuracy. By increasing the batch size you increase the generalization error within the test, this is why  **model3** is lower in test accuracy than **model1** or **model2**.  In these tests the smaller batch sizes work best.