In [None]:
import torch
from torch import nn, optim
import mltools
import dataloder as dl

In [None]:
class TransformerMNISTModel(nn.Module):
    """Transformer 分类模型"""

    def __init__(self, *args, **kwargs):
        """初始化函数"""
        nn.Module.__init__(self, *args, **kwargs)
        encode_layer = nn.TransformerEncoderLayer(d_model=28, nhead=4, batch_first=True)
        self.hidden_layer = nn.TransformerEncoder(encode_layer, num_layers=6)  # 定义隐藏层
        self.output_layer = nn.Linear(28, 10)  # 定义输出层

    def forward(self, x):
        """前向传播"""
        x = self.hidden_layer(x)  # x形状为(批量大小, 时间步数, 隐藏大小)
        x = self.output_layer(x[:, 0, :])  # 它的输出形状是(批量大小, 输出大小)
        return x

In [None]:
device = torch.device("cuda")
train_iter, val_iter, test_iter = dl.mnist(batch_size=1000)  # 获取训练集、验证集和测试集
model = TransformerMNISTModel()  # 定义训练模型
model.to(device)
loss = nn.CrossEntropyLoss()  # 设置损失函数
optimizer = optim.SGD(model.parameters(), lr=1e-1)  # 设置优化器
ml = mltools.MachineLearning("TransformerMNIST")
ml.add_model(model)
epoch, timer, recorder = ml.batch_create()

In [None]:
# 训练模型
num_epochs = epoch(100)
animator = ml.create_animator(xlabel="epoch", xlim=[0, epoch.totol_epoch + 1], ylim=-0.1, legend=["train loss", "val loss", "val acc"])  # 创建动画器
for current_epoch in range(1, num_epochs + 1):
    timer.start()

    # 计算训练集
    metric_train = mltools.Accumulator(2)  # 累加器：(train_loss, train_size)
    model.train()  # 训练模式
    for x, y in train_iter:
        x = x.to(device)  # 转换x
        x = x.permute(0, 2, 1, 3)  # 交换图片通道数和图片高度的维度, x形状为(批量大小, 图片高度, 图片通道数, 图片宽度)
        x = x.reshape(x.shape[0], x.shape[1], -1)  # 将图片展平, x形状为(批量大小, 图片高度, 图片通道数 * 图片宽度)
        y = y.to(device)  # 转换y
        y_train = model(x)  # 计算模型
        train_loss = loss(y_train, y)  # 计算训练损失

        # 梯度更新
        optimizer.zero_grad()
        train_loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1, norm_type=2)
        optimizer.step()

        metric_train.add(train_loss * y.numel(), y.numel())
    recorder[0].append(metric_train[0] / metric_train[1])

    # 计算验证集
    metric_val = mltools.Accumulator(3)  # 累加器：(val_loss, val_acc, val_size)
    model.eval()  # 验证模式
    with torch.no_grad():
        for x, y in val_iter:
            x = x.to(device)  # 转换x
            x = x.permute(0, 2, 1, 3)  # 交换图片通道数和图片高度的维度, x形状为(批量大小, 图片高度, 图片通道数, 图片宽度)
            x = x.reshape(x.shape[0], x.shape[1], -1)  # 将图片展平, x形状为(批量大小, 图片高度, 图片通道数 * 图片宽度)
            y = y.to(device)  # 转换y
            y_val = model(x)  # 计算模型
            val_loss = loss(y_val, y)  # 计算验证损失
            val_pred = y_val.argmax(dim=1)  # 计算预测值
            val_acc = (val_pred == y).sum()  # 计算验证准确率
            metric_val.add(val_loss * y.numel(), val_acc, y.numel())
    recorder[1].append(metric_val[0] / metric_val[2])
    recorder[2].append(metric_val[1] / metric_val[2])

    timer.stop()

    # 打印输出值
    ml.logger.info(f"train loss {recorder[0][-1]:.3f}, val loss {recorder[1][-1]:.3f}, val acc {recorder[2][-1]:.3f}")
    ml.print_training_time_massage(timer, num_epochs, current_epoch)
    ml.logger.info(f"trained on {str(device)}")
    animator.show(recorder.data)
else:
    # 打印输出值
    ml.logger.info(f"train loss {recorder[0][-1]:.3f}, val loss {recorder[1][-1]:.3f}, val acc {recorder[2][-1]:.3f}")
    ml.print_training_time_massage(timer, num_epochs, current_epoch)
    ml.logger.info(f"trained on {str(device)}")
    animator.show(recorder.data)
ml.save()

In [None]:
# 测试模型
model.eval()
metric = mltools.Accumulator(2)  # 累加器：(test_acc, test_size)
with torch.no_grad():
    for x, y in test_iter:
        x = x.to(device)  # 转换x
        x = x.permute(0, 2, 1, 3)  # 交换图片通道数和图片高度的维度, x形状为(批量大小, 图片高度, 图片通道数, 图片宽度)
        x = x.reshape(x.shape[0], x.shape[1], -1)  # 将图片展平, x形状为(批量大小, 图片高度, 图片通道数 * 图片宽度)
        y = y.to(device)  # 转换y
        y_test = model(x)  # 计算模型
        test_pred = y_test.argmax(dim=1)  # 计算准确率
        test_acc = (test_pred == y).sum()  # 计算测试准确率
        metric.add(test_acc, y.numel())
ml.logger.info(f"test acc {metric[0] / metric[1]:.3f}")  # 计算测试准确率并输出

In [None]:
# 预测模型
model.eval()
x, y = next(iter(test_iter))  # 从测试中取一个批量
x = x[:10].to(device)
x = x.permute(0, 2, 1, 3)  # 交换图片通道数和图片高度的维度, x形状为(批量大小, 图片高度, 图片通道数, 图片宽度)
x = x.reshape(x.shape[0], x.shape[1], -1)  # 将图片展平, x形状为(批量大小, 图片高度, 图片通道数 * 图片宽度)
y = y[:10].to(device)
y_pred = model(x)  # 计算模型
y_pred = y_pred.argmax(dim=1)  # 计算预测
labels = [f"real:{y[index]}\npred:{y_pred[index]}" for index in range(y.numel())]
mltools.images(x.squeeze(1), labels, shape=(2, 5))