In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [2]:
mnist = keras.datasets.mnist
(X_train_full, y_train_full),(X_test,y_test) = mnist.load_data()

## Normalizing data

In [3]:
X_valid, X_train = X_train_full[:5000] / 255, X_train_full[5000:]/255
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test/255

In [4]:
class_names = ["0","1","2","3","4","5","6","7","8","9"]

In [5]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation = "relu"),
    keras.layers.Dense(100, activation = "relu"),
    keras.layers.Dense(10, activation = "softmax")])

  super().__init__(**kwargs)


## Changing optimizers

In [6]:
# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers

model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

opt=tf.keras.optimizers.SGD(learning_rate=0.0001, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=opt)

model.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(clipvalue=0.5))

## Learning rate scheduling

In [7]:
# https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules
    
## Power scheduling
opt = tf.keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)
model.compile(loss="categorical_crossentropy", optimizer=opt)


## Exponential scheduling
def exponential_decay(epoch):
    return 0.01 * 0.1**(epoch/20)

lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay)

# OR

initial_learning_rate = 0.1
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate, decay_steps=100000, decay_rate=0.96)

opt = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

                                                          
## Piecewise constant scheduling
def piecewise_constant_scheduling():
    if epoch < 5:
        return 0.01
    elif epoch <15:
        return 0.001
    else:
        return 0.0001
    
lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_scheduling)

# OR

step = tf.Variable(0, trainable=False)
boundaries = [100000, 110000]
values = [1.0, 0.5, 0.1]
learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay(boundaries, values)
learning_rate = learning_rate_fn(step)

lr_scheduler = keras.callbacks.LearningRateScheduler(learning_rate)


## Performance scheduling
# https://keras.io/api/callbacks/reduce_lr_on_plateau/
lr_scheduler = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)


history = model.fit(X_train, y_train, epochs=30, batch_size=32, callbacks=[lr_scheduler])





Epoch 1/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8938 - loss: 0.3386 - learning_rate: 0.0999
Epoch 2/30
[1m  57/1719[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 2ms/step - accuracy: 0.9719 - loss: 0.0940

  callback.on_epoch_end(epoch, logs)


[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9680 - loss: 0.1008 - learning_rate: 0.0999
Epoch 3/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9805 - loss: 0.0629 - learning_rate: 0.0998
Epoch 4/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9837 - loss: 0.0514 - learning_rate: 0.0997
Epoch 5/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9857 - loss: 0.0422 - learning_rate: 0.0996
Epoch 6/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9883 - loss: 0.0351 - learning_rate: 0.0996
Epoch 7/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9897 - loss: 0.0298 - learning_rate: 0.0995
Epoch 8/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9914 - loss: 0.0247 - learning_ra

## Using mini-batches

In [8]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9976 - loss: 0.0065
Epoch 2/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9988 - loss: 0.0038
Epoch 3/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9984 - loss: 0.0057
Epoch 4/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9982 - loss: 0.0064
Epoch 5/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9980 - loss: 0.0062
Epoch 6/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9984 - loss: 0.0042
Epoch 7/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9980 - loss: 0.0058
Epoch 8/30
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9990 - loss: 0.0036
Epoch 9/30
[1m1719/1719