In [13]:
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Input
from keras.optimizers import SGD
from keras.utils import to_categorical
np.random.seed(1671) # for reproducibility

# network and training
NB_EPOCH = 20
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION



In [14]:
# data: shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# normalize
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, NB_CLASSES)
y_test = to_categorical(y_test, NB_CLASSES)


60000 train samples
10000 test samples


In [15]:
# M_HIDDEN hidden layers
# 10 outputs
# final stage is softmax
model = Sequential()
model.add(Input(shape=(RESHAPED,)))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=OPTIMIZER,
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)


Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4391 - loss: 1.8713 - val_accuracy: 0.8312 - val_loss: 0.7646
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8328 - loss: 0.6840 - val_accuracy: 0.8813 - val_loss: 0.4618
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8731 - loss: 0.4728 - val_accuracy: 0.8987 - val_loss: 0.3731
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8913 - loss: 0.3879 - val_accuracy: 0.9067 - val_loss: 0.3325
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9038 - loss: 0.3459 - val_accuracy: 0.9136 - val_loss: 0.3081
Epoch 6/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9068 - loss: 0.3243 - val_accuracy: 0.9174 - val_loss: 0.2905
Epoch 7/20
[1m375/375[0m 

In [16]:
score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 404us/step - accuracy: 0.9382 - loss: 0.2175
Test score: 0.1875109076499939
Test accuracy: 0.9463000297546387


In [21]:
# Experiment 1: Increase Number of Epochs to 30
NB_EPOCH = 30

model = Sequential()
model.add(Input(shape=(RESHAPED,)))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# Create a new optimizer instance
optimizer = SGD()

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])


Epoch 1/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4644 - loss: 1.8483 - val_accuracy: 0.8442 - val_loss: 0.7005
Epoch 2/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8422 - loss: 0.6415 - val_accuracy: 0.8851 - val_loss: 0.4492
Epoch 3/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8792 - loss: 0.4500 - val_accuracy: 0.8975 - val_loss: 0.3738
Epoch 4/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8937 - loss: 0.3861 - val_accuracy: 0.9064 - val_loss: 0.3364
Epoch 5/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9034 - loss: 0.3475 - val_accuracy: 0.9112 - val_loss: 0.3138
Epoch 6/30
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9084 - loss: 0.3236 - val_accuracy: 0.9168 - val_loss: 0.2970
Epoch 7/30
[1m375/375[0m 

In [22]:
# Experiment 2: Decrease Number of Epochs to 10
NB_EPOCH = 10

model = Sequential()
model.add(Input(shape=(RESHAPED,)))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# Create a new optimizer instance
optimizer = SGD()

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])


Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4772 - loss: 1.8354 - val_accuracy: 0.8483 - val_loss: 0.6855
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8463 - loss: 0.6240 - val_accuracy: 0.8869 - val_loss: 0.4331
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8786 - loss: 0.4418 - val_accuracy: 0.9003 - val_loss: 0.3645
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8946 - loss: 0.3740 - val_accuracy: 0.9064 - val_loss: 0.3298
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9000 - loss: 0.3473 - val_accuracy: 0.9113 - val_loss: 0.3087
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9072 - loss: 0.3282 - val_accuracy: 0.9137 - val_loss: 0.2924
Epoch 7/10
[1m375/375[0m 

In [23]:
# Experiment 3: Increase Batch Size to 256
NB_EPOCH = 20  # Resetting the number of epochs to the original value
BATCH_SIZE = 256

model = Sequential()
model.add(Input(shape=(RESHAPED,)))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# Create a new optimizer instance
optimizer = SGD()

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

score = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])


Epoch 1/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2994 - loss: 2.1313 - val_accuracy: 0.7135 - val_loss: 1.3956
Epoch 2/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7372 - loss: 1.2127 - val_accuracy: 0.8246 - val_loss: 0.7703
Epoch 3/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8237 - loss: 0.7396 - val_accuracy: 0.8616 - val_loss: 0.5627
Epoch 4/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8553 - loss: 0.5698 - val_accuracy: 0.8802 - val_loss: 0.4691
Epoch 5/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8734 - loss: 0.4842 - val_accuracy: 0.8897 - val_loss: 0.4171
Epoch 6/20
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8840 - loss: 0.4337 - val_accuracy: 0.8954 - val_loss: 0.3838
Epoch 7/20
[1m188/188[0m 

## Analysis of Parameter Changes

### Baseline Model (20 Epochs, Batch Size 128)
- **Test accuracy:** 94.63%

### Experiment 1: Increase Number of Epochs to 30
- **Test accuracy:** 95.41%
- **Observation:** Increasing the number of epochs improved the test accuracy because the model had more opportunities to learn from the training data. This additional training time allowed the model to better fit the data, resulting in higher accuracy.

### Experiment 2: Decrease Number of Epochs to 10
- **Test accuracy:** 92.75%
- **Observation:** Decreasing the number of epochs resulted in lower test accuracy, indicating that the model did not have enough time to learn effectively. With fewer epochs, the model may not have fully captured the underlying patterns in the data, leading to underfitting.

### Experiment 3: Increase Batch Size to 256
- **Test accuracy:** 92.68%
- **Observation:** Increasing the batch size had a negative effect on the accuracy. Larger batch sizes can sometimes lead to more stable updates, but can also result in less frequent updates, which might not capture the data's complexity as effectively as smaller batch sizes.
