In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import time
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint

mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_dev = x_train[-10000:]
y_dev = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

# one-hot format
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
y_dev = np_utils.to_categorical(y_dev, 10)

x_train = x_train / 255.0
x_dev = x_dev / 255.0
x_test = x_test / 255.0

In [None]:
# 1
# L2 regularization, Adam optimizer

batchSize = 128
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time:', time.time() - start)

print('train loss:', model_fit.history['loss'][-1])
print('train accuracy:', model_fit.history['accuracy'][-1])

print('dev loss:', model_fit.history['val_loss'][-1])
print('dev accuracy:', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50

In [None]:
# 2
# Dropout regularization(0.2), Adam optimizer

batchSize = 128
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)

In [None]:
# 3
# Early stopping(patience=5), adam optimizer

batchSize = 128
epoch = 50

early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
# early stopping
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev), callbacks=[early_stopping, model_checkpoint])

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

best_model = keras.models.load_model('best_model.h5')
best_test_loss, best_test_acc = best_model.evaluate(x_test, y_test)
print('best test model loss: ', best_test_loss)
print('best test model accuracy: ', best_test_acc)

In [None]:
# 4
# L2 regularization, batch norm, adam optimizer

batchSize = 128
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)

In [None]:
# 5
# L2 regularization, Adam optimizer

batchSize = 256
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time:', time.time() - start)

print('train loss:', model_fit.history['loss'][-1])
print('train accuracy:', model_fit.history['accuracy'][-1])

print('dev loss:', model_fit.history['val_loss'][-1])
print('dev accuracy:', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)

In [None]:
# 6
# Dropout regularization(0.2), Adam optimizer

batchSize = 256
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)

In [None]:
# 7
# Early stopping(patience=5), adam optimizer

batchSize = 256
epoch = 50

early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
# early stopping
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev), callbacks=[early_stopping, model_checkpoint])

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

best_model = keras.models.load_model('best_model.h5')
best_test_loss, best_test_acc = best_model.evaluate(x_test, y_test)
print('best test model loss: ', best_test_loss)
print('best test model accuracy: ', best_test_acc)

In [None]:
# 8
# L2 regularization, batch norm, adam optimizer

batchSize = 256
epoch = 50

# DEFINE MODEL
# 512 relu
# 512 relu
# 10 softmax
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.0001), activation=tf.nn.relu),
    tf.keras.layers.BatchNormalization(),
    
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# COMPILE
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

start = time.time()

# TRAIN
model_fit = model.fit(x_train, y_train,
                      batch_size=batchSize, epochs=epoch, verbose=1,
                      validation_data=(x_dev, y_dev))

print('training time: ', time.time() - start)

print('train loss: ', model_fit.history['loss'][-1])
print('train accuracy: ', model_fit.history['accuracy'][-1])

print('dev loss: ', model_fit.history['val_loss'][-1])
print('dev accuracy: ', model_fit.history['val_accuracy'][-1])

plt.subplot(1, 2, 1)
plt.plot(model_fit.history['val_loss'])
plt.title('Val Loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(model_fit.history['val_accuracy'])
plt.title('Val Accuracy')
plt.xlabel('epoch')
plt.ylabel('acc')

plt.show()

test_loss, test_acc = model.evaluate(x_test, y_test)
print('test loss:', test_loss)
print('test accuracy:', test_acc)