# 实验目标：自定义Train Loop
使用自定义训练循环训练模型来处理Fashion MNIST数据集：
   - a.显示每个轮次、迭代、平均训练损失和每个轮次的平均精度（在每次迭代中更新），以及每个轮次结束时的验证损失和精度。
   - b.尝试对上面的层和下面的层使用具有不同学习率的不同优化器。

# 一、数据集准备

## 数据集分离(Split)

In [1]:
import tensorflow as tf

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
print(f"训练集样本的形状为：{X_train.shape}")
print(f"训练集样本标签的形状为：{y_train.shape}")
print(f"测试集样本的形状为：{X_test.shape}")
print(f"测试集标签的形状为：{y_test.shape}")

训练集样本的形状为：(60000, 28, 28)
训练集样本标签的形状为：(60000,)
测试集样本的形状为：(10000, 28, 28)
测试集标签的形状为：(10000,)


## 归一化数据(Normalization)

In [2]:
X_train, X_test = X_train / 255., X_test/ 255.

In [3]:
import numpy as np
np.ptp(X_train), np.ptp(X_test)

(1.0, 1.0)

## 采样（Sampling）
抽取一定数量的数据集作为模型每个批次的数据

In [4]:
def sampling(X, y, batch):
    """

    :param X: 输入的训练集/测试集样本
    :param y: 训练集/测试集标签
    :param batch: 批次大小
    :return: 随机采样的样本
    """
    indices = np.random.randint(X.shape[0], size=batch)
    X_batch, y_batch = X[indices], y[indices]
    return X_batch, y_batch

# 二、TensorFlow中自定义训练循环

搭建一个宽深神经网络模型，如下图所示，并且输入宽和输入深使用不同的优化器和学习率

<center>
<img src="./images/neural_network/p1.png" style="zoom: 50%">
</center>

In [5]:
# 定义一个函数，以显示训练状态
def print_status_bar(step, total, loss, metrics=None):
    metrics = " - ".join([f"{m.name}: {m.result():.4f}" for m in [loss] + (metrics or [])])
    end = "" if step < total else "\n"

    print(f"\r{step}/{total} - " + metrics, end=end)

In [6]:
class WideDeepModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.layer = [
            tf.keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal"),
            tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
            tf.keras.layers.Dense(10, activation="softmax"),
        ]
        self.flat = tf.keras.layers.Flatten()

    def call(self, inputs, training=True, mask=None):
        z = self.flat(inputs)
        for layer in self.layer:
            z = layer(z)

        return  z


In [7]:
model = WideDeepModel()

In [8]:
# 一些超参数
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size

# 指定训练的优化器、损失和指标
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=0.001, decay_steps=1000, decay_rate=0.9)
optimizer1 = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

loss = tf.keras.losses.SparseCategoricalCrossentropy()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

# 用于计算平均损失
mean_loss = tf.keras.metrics.Mean()

# 自定义训练循环，类似于TensorFlow自带的fit
for epoch in range(1, n_epochs + 1):            # 每个轮次
    print(f"Epoch {epoch}/{n_epochs}")

    # 训练阶段
    print("训练中...")
    for step in range(1, n_steps + 1):         # 每个批次
        X_batch, y_batch = sampling(X_train, y_train, batch_size)

        with tf.GradientTape(persistent=True) as tape:
            y_pred = model(X_batch, training=True)
            losses = tf.reduce_mean(loss(y_batch, y_pred))

        gradients1 = tape.gradient(losses, model.layer[0].trainable_variables)
        gradients2 = tape.gradient(losses, model.layer[1].trainable_variables)
        del tape

        optimizer1.apply_gradients(zip(gradients1, model.layer[0].trainable_variables))
        optimizer2.apply_gradients(zip(gradients2, model.layer[1].trainable_variables))

        mean_loss(losses)
        for metric in metrics:
            metric(y_batch, y_pred)

        print_status_bar(step, n_steps, mean_loss, metrics)

    # 验证阶段
    print("验证中....")
    val_steps = len(X_test) // batch_size

    for step in range(1, val_steps + 1):
        X_val_batch, y_val_batch = sampling(X_test, y_test, batch_size)

        y_val_pred = model(X_val_batch, training=False)             # training=False
        val_losses = tf.reduce_mean(loss(y_val_batch, y_val_pred))

        mean_loss(val_losses)
        for val_metric in metrics:
            val_metric(y_val_batch, y_val_pred)
    print(f"验证集损失为：{mean_loss.result():.4f}，准确率为:{metrics[0].result():.4f}")

    # 下个轮次清空状态
    for metric in [mean_loss] + metrics:
        metric.reset_states()


Epoch 1/5
训练中...
1875/1875 - mean: 0.5665 - sparse_categorical_accuracy: 0.8103
验证中....
验证集损失为：0.5566，准确率为:0.8120
Epoch 2/5
训练中...
1875/1875 - mean: 0.4286 - sparse_categorical_accuracy: 0.8493
验证中....
验证集损失为：0.4335，准确率为:0.8471
Epoch 3/5
训练中...
1875/1875 - mean: 0.3977 - sparse_categorical_accuracy: 0.8587
验证中....
验证集损失为：0.4035，准确率为:0.8566
Epoch 4/5
训练中...
1875/1875 - mean: 0.3844 - sparse_categorical_accuracy: 0.8669
验证中....
验证集损失为：0.3885，准确率为:0.8650
Epoch 5/5
训练中...
1875/1875 - mean: 0.3682 - sparse_categorical_accuracy: 0.8707
验证中....
验证集损失为：0.3761，准确率为:0.8679
