# MLP 模型

## 1. 数据获取及预处理

In [1]:
import tensorflow as tf
import numpy as np

class MNISTLoader:
    def __init__(self):
        super().__init__()
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        
        # 归一化至(0~1)之间
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255., axis=-1)
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255., axis=-1)
        
        self.trian_label = self.train_label.astype(np.int32)
        self.test_label = self.test_label.astype(np.int32)
        
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]
    
    def get_batch(self, batch_size: int, dataset: str = "train"):
        """随机获取训练数据"""
        if dataset == "train":
            index = np.random.randint(0, self.num_train_data, batch_size)
            return self.train_data[index, :], self.train_label[index]
        index = np.random.randint(0, self.num_test_data, batch_size)
        return self.test_data[index, :], self.test_label[index]

## 2. 构建模型

In [2]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.dense_1 = tf.keras.layers.Dense(120, activation=tf.nn.relu)
        self.dense_2 = tf.keras.layers.Dense(10,)
    
    def call(self, inputs: tf.Tensor):
        x = self.flatten(inputs)
        x = self.dense_1(x)
        x = self.dense_2(x)
        output = tf.nn.softmax(x)
        return output

## 3. 模型的训练

In [3]:
from tqdm.notebook import tqdm
# 定义训练过程参数
num_epochs = 1
batch_size = 50
learning_rate = 1e-3
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

num_batches = int(data_loader.num_train_data // batch_size * num_epochs)

with tqdm(total=num_batches) as bar:
    for batch_index in range(num_batches):
        x, y = data_loader.get_batch(batch_size)
        with tf.GradientTape() as tape:
            y_hat = model(x)
            loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_hat)
            loss = tf.reduce_sum(loss)
        bar.set_description(f"loss: {loss}")
        bar.update()
        gradients = tape.gradient(loss, model.variables)
        optimizer.apply_gradients(grads_and_vars=zip(gradients, model.variables))

HBox(children=(FloatProgress(value=0.0, max=1200.0), HTML(value='')))




## 4 模型的评估

In [7]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

num_batches = int(data_loader.num_test_data // batch_size)

with tqdm(total=num_batches) as bar:
    for batch_index in range(num_batches):
        x, y = data_loader.get_batch(batch_size, 'test')
        y_pred = model.predict(x)
        sparse_categorical_accuracy.update_state(y_true=y, y_pred=y_pred)
        bar.set_description(f"acc: {sparse_categorical_accuracy.result()}")
        bar.update()

HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))


