In [1]:
# Testing Tensorflow 2.0 APIs with the builtin fashion mnist dataset.
# refer to: https://zhuanlan.zhihu.com/p/70232196

import numpy as np
import tensorflow as tf


fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [2]:
# make the input dataset 4-D
train_images = train_images.reshape((-1, 28, 28, 1))
test_images = test_images.reshape((-1, 28, 28, 1))

# standardise ([0, 1] range)
train_images = train_images / np.float32(255)
test_images = test_images / np.float32(255)

train_labels = train_labels.astype('int64')
test_labels = test_labels.astype('int64')

# make tf dataset
train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(32)


# model
class FMModel(tf.keras.Sequential):
    def __init__(self):
        super(FMModel, self).__init__([
            tf.keras.layers.Conv2D(32, 3, activation='relu'),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Conv2D(64, 3, activation='relu'),
            tf.keras.layers.MaxPool2D(),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(10, activation=None)
        ])

model = FMModel()


# decayed_learning_rate = initial_learining_rate * decay_rate^floor(global_step / decay_steps),
# use floor value when staircase=True.
initial_learning_rate = 1e-4
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True)

# optimizer
# RMSprop is kind of discounted moving average gradient, an improved version of tf.keras.optimizers.Adagrad.
optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)


# checkpoint
checkpoint = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, model=model)
manager = tf.train.CheckpointManager(checkpoint, './tf_ckpts', max_to_keep=3)


# loss function
# sparse categorical cross entropy explain: https://tensorflow.google.cn/api_docs/python/tf/nn/sparse_softmax_cross_entropy_with_logits?hl=en
# logits explain: https://stackoverflow.com/questions/34240703/what-is-logits-softmax-and-softmax-cross-entropy-with-logits?noredirect=1&lq=1%5D
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


# metrics
train_loss_metric = tf.keras.metrics.Mean(name='train_loss')
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss_metric = tf.keras.metrics.Mean(name='test_loss')
test_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [3]:
# train loop

@tf.function
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_object(targets, predictions)
        loss += sum(model.losses)
    # compute gradients and update variables
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss_metric(loss)
    train_acc_metric(targets, predictions)


epochs = 10
for epoch in range(epochs):
    print(f'Start of epoch {epoch}')
    # iterate over the batches of input dataset
    for step, (inputs, targets) in enumerate(train_ds):
        train_step(inputs, targets)
        checkpoint.step.assign_add(1)
        # log every 20 steps
        if step % 20 == 0:
            manager.save()
            print(f'Epoch: {epoch}, Step: {step}, Train Loss: {train_loss_metric.result().numpy()}, ' \
                  f'Train Accuracy: {train_acc_metric.result().numpy()}')
            train_loss_metric.reset_states()
            train_acc_metric.reset_states()

Start of epoch 0
Epoch: 0, Step: 0, Train Loss: 2.3140251636505127, Train Accuracy: 0.125
Epoch: 0, Step: 20, Train Loss: 2.2335948944091797, Train Accuracy: 0.2109375
Epoch: 0, Step: 40, Train Loss: 2.112999439239502, Train Accuracy: 0.41093748807907104
Epoch: 0, Step: 60, Train Loss: 1.966436743736267, Train Accuracy: 0.504687488079071
Epoch: 0, Step: 80, Train Loss: 1.8170671463012695, Train Accuracy: 0.48750001192092896
Epoch: 0, Step: 100, Train Loss: 1.634521484375, Train Accuracy: 0.578125
Epoch: 0, Step: 120, Train Loss: 1.5044658184051514, Train Accuracy: 0.551562488079071
Epoch: 0, Step: 140, Train Loss: 1.3394396305084229, Train Accuracy: 0.612500011920929
Epoch: 0, Step: 160, Train Loss: 1.199575662612915, Train Accuracy: 0.6546875238418579
Epoch: 0, Step: 180, Train Loss: 1.123679518699646, Train Accuracy: 0.675000011920929
Epoch: 0, Step: 200, Train Loss: 1.0456328392028809, Train Accuracy: 0.651562511920929
Epoch: 0, Step: 220, Train Loss: 1.0284055471420288, Train Accur

In [4]:
# do test

@tf.function
def test_step(inputs, targets):
    predictions = model(inputs, training=True)
    loss = loss_object(targets, predictions)
    test_loss_metric(loss)
    test_acc_metric(targets, predictions)


for inputs, targets in test_ds:
    test_step(inputs, targets)

print(f'Test Loss: {test_loss_metric.result().numpy()} Test Accuracy: {test_acc_metric.result().numpy()}')
test_loss_metric.reset_states()
test_acc_metric.reset_states()

Test Loss: 0.32966485619544983 Test Accuracy: 0.8827000260353088
