In [1]:
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, optimizers

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [2]:
(x, y), (x_test, y_test) = datasets.mnist.load_data()

In [3]:
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255
y = tf.convert_to_tensor(y, dtype=tf.uint8)
# y = tf.one_hot(y, depth=10)

x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255
y_test = tf.convert_to_tensor(y_test, dtype=tf.uint8)
# y_test = tf.one_hot(y_test, depth=10)

x.shape, y.shape, x_test.shape, y_test.shape

(TensorShape([60000, 28, 28]),
 TensorShape([60000]),
 TensorShape([10000, 28, 28]),
 TensorShape([10000]))

In [4]:
train_dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(200)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(200)

In [5]:
W1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
B1 = tf.Variable(tf.random.truncated_normal([256], stddev=0.1))
W2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
B2 = tf.Variable(tf.random.truncated_normal([128], stddev=0.1))
W3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
B3 = tf.Variable(tf.random.truncated_normal([10], stddev=0.1))
lr = 1e-3

In [50]:
def model(x):
    yp = x @ W1 + B1
    yp = tf.nn.relu(yp)
    yp = yp @ W2 + B2
    yp = tf.nn.relu(yp)
    yp = yp @ W3 + B3
    yp = tf.nn.softmax(yp, axis=1)
    return yp

def update_variable(v: tf.Variable, grad):
    v.assign_sub(lr * grad)
    
def update_variables(grads):
    update_variable(W1, grads[0])
    update_variable(B1, grads[1])
    update_variable(W2, grads[2])
    update_variable(B2, grads[3])
    update_variable(W3, grads[4])
    update_variable(B3, grads[5])
    
def train(epoch):
    for step, (x, y) in enumerate(train_dataset):
        x = tf.reshape(x, [-1, 28 * 28])
        y = tf.one_hot(y, depth=10)
        with tf.GradientTape() as tape:
            # yp = x @ W1 + B1
            # yp = tf.nn.relu(yp)
            # yp = yp @ W2 + B2
            # yp = tf.nn.relu(yp)
            # yp = yp @ W3 + B3
            yp = model(x)

            # loss
            loss = tf.square(yp - y)
            # print(loss.shape)(200, 10)
            loss = tf.reduce_mean(loss)

        grads = tape.gradient(loss, [W1, B1, W2, B2, W3, B3])
        update_variables(grads)
        loss = float(loss)
        # if step % 100 == 0:
        #     print(f'epoch: {epoch}, step: {step}, loss: {float(loss)}.')
    return loss

def test(epoch, loss):
    total_correct, total_num = 0, 0
    for step, (x, y) in enumerate(test_dataset):
        x = tf.reshape(x, [-1, 28 * 28])
        out = model(x)
        pred = tf.argmax(out, axis=1)
        pred = tf.cast(pred, dtype=tf.uint8)
        correct = tf.cast(tf.equal(pred, y), dtype=tf.uint8)
        correct = tf.reduce_sum(correct)
        
        total_correct += int(correct)
        total_num += x.shape[0]
    
    acc = total_correct / total_num
    print(f'epoch: {epoch}, loss: {loss}, acc: {acc}.')

def train_epoch(epoch_num):
    for epoch in range(5):
        loss = train(epoch)
        test(epoch, loss)

In [57]:
lr = 1e-1
train_epoch(5)

epoch: 0, loss: 0.0016941054491326213, acc: 0.9598.
epoch: 1, loss: 0.0016858691815286875, acc: 0.9599.
epoch: 2, loss: 0.0016782759921625257, acc: 0.9599.
epoch: 3, loss: 0.0016716597601771355, acc: 0.96.
epoch: 4, loss: 0.0016648503951728344, acc: 0.96.


In [56]:
lr = 1e-1
train_epoch(5)

epoch: 0, loss: 0.001736431964673102, acc: 0.9582.
epoch: 1, loss: 0.0017281122272834182, acc: 0.9587.
epoch: 2, loss: 0.0017191499937325716, acc: 0.9592.
epoch: 3, loss: 0.001710074138827622, acc: 0.9594.
epoch: 4, loss: 0.0017022374086081982, acc: 0.9597.


In [49]:
model(tf.reshape(x_test[:2], (-1, 28*28)))

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[6.3098872e-05, 1.7306203e-08, 7.2310380e-05, 1.8900177e-04,
        7.8045503e-07, 2.9244162e-05, 3.8498063e-09, 9.9942529e-01,
        5.2804476e-06, 2.1501494e-04],
       [2.3252047e-03, 3.0753104e-06, 9.7294647e-01, 7.0973532e-03,
        1.0673008e-09, 1.4866168e-02, 1.1816265e-03, 5.6261067e-09,
        1.5802127e-03, 1.4692765e-09]], dtype=float32)>

In [63]:
out = model(tf.reshape(x_test[:2], (-1, 28*28)))

In [64]:
out = tf.argmax(out, axis=1)

In [65]:
out

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([7, 2], dtype=int64)>