In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

In [None]:
batch_size = 64
shuffle_buffer_size = 100
num_classes = 10
epochs = 6

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(shuffle_buffer_size).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_ds = test_ds.batch(batch_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [None]:
model = Sequential([
  layers.Flatten(input_shape=(28, 28)),#input image dimensions
  layers.Dense(128, activation='relu'),
  layers.Dropout(0.2),
  layers.Dense(10)
])


In [None]:
def mse_grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = tf.keras.losses.mean_squared_error(y_true=targets, y_pred=model(inputs, training=True))
    return tape.gradient(loss_value, model.trainable_variables)

##HJB-AdaGrad Optimization

In [None]:
def hjb_optimize(model, train_ds, test_ds, r=100., epochs=6, metric=tf.keras.metrics.Accuracy):
    train_loss_results = []
    train_accuracy_results = []

    for epoch in range(epochs):
        epoch_loss_avg = tf.keras.metrics.Mean()
        epoch_accuracy = metric()

        for x, y in train_ds:
            grads = mse_grad(model, x, y)
            grad_norm_value = 0.
            for g in grads:
                grad_norm_value = tf.sqrt(grad_norm_value**2 + tf.norm(g)**2)
            loss_value = model.loss(y_true=y, y_pred=model(x, training=True))
            for g in grads:
                g *= tf.sqrt(2*loss_value)
                g /= grad_norm_value
                g /= tf.sqrt(r)
            (model.optimizer).apply_gradients(zip(grads, model.trainable_variables))
            # print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(), loss_value))

            # Track progress
            epoch_loss_avg.update_state(loss_value)
            epoch_accuracy.update_state(y, model(x, training=True))

        # End epoch
        train_loss_results.append(epoch_loss_avg.result())
        train_accuracy_results.append(epoch_accuracy.result())


        if epoch % 1 == 0:
            print("Epoch {:02d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,
                                                                        epoch_loss_avg.result(),
                                                                        epoch_accuracy.result()))

    test_accuracy = metric()

    for x, y in test_ds:
        test_accuracy.update_state(y, model(x, training=False))

    print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

In [None]:
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adagrad(), metrics=['accuracy'])
hjb_optimize(model, train_ds, test_ds, r=100., epochs=epochs, metric=tf.keras.metrics.Accuracy)