In [1]:
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, datasets, layers, metrics, optimizers

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [2]:
(x, y), (x_test, y_test) = datasets.fashion_mnist.load_data()
x.shape, y.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [3]:
batchsz = 128

In [4]:
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.0
    y = tf.cast(y, dtype=tf.int32)
    return x, y

In [10]:
db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(10000).batch(batchsz)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).shuffle(10000).batch(batchsz)

In [11]:
model = Sequential(
    [
        layers.Dense(256, activation=tf.nn.relu),
        layers.Dense(128, activation=tf.nn.relu),
        layers.Dense(64, activation=tf.nn.relu),
        layers.Dense(32, activation=tf.nn.relu),
        layers.Dense(10, activation=tf.nn.relu),
    ]
)
model.build(input_shape=[None, 28 * 28])

In [12]:
optimizer = optimizers.Adam(learning_rate=1e-3)

In [22]:
def train_epoch(epoch: int):
    loss_sum = 0
    x_count = 0
    for step, (x, y) in enumerate(db):
        x = tf.reshape(x, [-1, 28 * 28])
        x_count += x.shape[0] # display element
        y_onehot = tf.one_hot(y, depth=10)
        with tf.GradientTape() as tape:
            logits = model(x)
            # loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
            loss_ce = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
            loss_sum += float(tf.reduce_sum(loss_ce)) # display element
            loss_ce = tf.reduce_mean(loss_ce)
        grads = tape.gradient(loss_ce, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        loss = float(loss_ce)
        # if step % 100 == 0:
        #     print(f"[{epoch}]({step})> loss_ce: {float(loss_ce)}.")

    loss = loss_sum / x_count
    ## test
    total_corrent = 0
    total_num = 0
    for x, y in db_test:
        x = tf.reshape(x, [-1, 28 * 28])
        logits = model(x)  # [b, 10]
        prob = tf.nn.softmax(logits, axis=1)
        pred = tf.argmax(prob, axis=1)  # the index of max value in prob's axis=1 => [b]
        pred = tf.cast(pred, dtype=tf.int32)
        corrent = tf.equal(pred, y)  # [b], True: equal, False: not equal
        corrent = tf.reduce_sum(tf.cast(corrent, dtype=tf.int32))

        total_corrent += int(corrent)
        total_num += x.shape[0]

    acc = round(total_corrent / total_num, 4)
    
    print(f"[{epoch}]> test acc: {acc}, x_count: {x_count}, loss: {loss}")


def train(epochs: int):
    for epoch in range(epochs):
        train_epoch(epoch)

In [23]:
train(5)

[0](0)> loss_ce: 0.4720242917537689.
[0](100)> loss_ce: 0.3571893572807312.
[0](200)> loss_ce: 0.496612012386322.
[0](300)> loss_ce: 0.46405869722366333.
[0](400)> loss_ce: 0.33381739258766174.
[0]> test acc: 0.8814, x_count: 60000, loss: 0.3920767405192057
[1](0)> loss_ce: 0.4182037115097046.
[1](100)> loss_ce: 0.3833233416080475.
[1](200)> loss_ce: 0.2686905860900879.
[1](300)> loss_ce: 0.4610295295715332.
[1](400)> loss_ce: 0.44221892952919006.
[1]> test acc: 0.871, x_count: 60000, loss: 0.38620709657669067
[2](0)> loss_ce: 0.4430846571922302.
[2](100)> loss_ce: 0.386136919260025.
[2](200)> loss_ce: 0.46806713938713074.
[2](300)> loss_ce: 0.3248709738254547.
[2](400)> loss_ce: 0.39974498748779297.
[2]> test acc: 0.8709, x_count: 60000, loss: 0.37882201042175295
[3](0)> loss_ce: 0.27965718507766724.
[3](100)> loss_ce: 0.2871922552585602.
[3](200)> loss_ce: 0.3779420554637909.
[3](300)> loss_ce: 0.39333266019821167.
[3](400)> loss_ce: 0.4212396740913391.
[3]> test acc: 0.8903, x_count