In [30]:
import torch
import numpy as np
import random
from IPython import display
from matplotlib import pyplot as plt
from torch import nn
import torch.utils.data as Data
import torch.optim as optim
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"  #防止jupyter崩溃

In [31]:
#下载MNIST手写数据集
mnist_train = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=True,
download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=False,
download=True, transform=transforms.ToTensor())
#读取数据
batch_size = 32
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,
num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,
num_workers=0)

In [32]:
#实现FlattenLayer层
class FlattenLayer(torch.nn.Module):
    def __init__(self):
         super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0],-1)

In [33]:
#模型定义和参数初始化
num_inputs,num_hiddens,num_outputs = 784,256,10
net = nn.Sequential(
        FlattenLayer(),
        nn.Linear(num_inputs,num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens,num_outputs)
        )

for params in net.parameters():
    init.normal_(params,mean=0,std=0.01)

In [34]:
#训练次数和学习率
num_epochs = 50
lr = 0.01
#定义交叉熵损失函数
loss = torch.nn.CrossEntropyLoss()
#L2正则化 由于b为常数，改变b值对模型整体影响不大，此处一起惩罚，并未区分
optimizer = torch.optim.SGD(net.parameters(),lr,weight_decay=1e-2)

In [35]:
#测试集loss
def evaluate_loss(data_iter,net):
    acc_sum,loss_sum,n = 0.0,0.0,0
    for X,y in data_iter:
        y_hat = net(X)
        acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
        l = loss(y_hat,y) # l是有关小批量X和y的损失
        loss_sum += l.sum().item()*y.shape[0]
        n+=y.shape[0]
    return acc_sum/n,loss_sum/n

In [36]:
#定义模型训练函数
def train(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None):
    train_ls = []
    test_ls = []
    for epoch in range(num_epochs): # 训练模型一共需要num_epochs个迭代周期
        train_l_sum, train_acc_num,n = 0.0,0.0,0
        # 在每一个迭代周期中，会使用训练数据集中所有样本一次
        for X, y in train_iter: # x和y分别是小批量样本的特征和标签
            y_hat = net(X)
            l = loss(y_hat, y).sum() # l是有关小批量X和y的损失
            #梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward() # 小批量的损失对模型参数求梯度
            if optimizer is None:
                SGD(params,lr)
            else:
                optimizer.step()
            #计算每个epoch的loss
            train_l_sum += l.item()*y.shape[0]
            #计算训练样本的准确率
            train_acc_num += (y_hat.argmax(dim=1)==y).sum().item()
            #每一个epoch的所有样本数
            n+= y.shape[0]
        train_ls.append(train_l_sum/n)
        test_acc,test_l = evaluate_loss(test_iter,net)
        test_ls.append(test_l)
        print('epoch %d, train_loss %.6f,test_loss %f,train_acc %.6f,test_acc %.6f'%(epoch+1, train_ls[epoch],test_ls[epoch],train_acc_num/n,test_acc))
    return train_ls,test_ls

In [37]:
train_loss,test_loss = train(net,train_iter,test_iter,loss,num_epochs,batch_size,net.parameters,lr,optimizer)

epoch 1, train_loss 1.209822,test_loss 0.518126,train_acc 0.720917,test_acc 0.873400
epoch 2, train_loss 0.455061,test_loss 0.388024,train_acc 0.882683,test_acc 0.898800
epoch 3, train_loss 0.386486,test_loss 0.353744,train_acc 0.898983,test_acc 0.906200
epoch 4, train_loss 0.360963,test_loss 0.336683,train_acc 0.905550,test_acc 0.912100
epoch 5, train_loss 0.346036,test_loss 0.324864,train_acc 0.909883,test_acc 0.915200
epoch 6, train_loss 0.335267,test_loss 0.316686,train_acc 0.913467,test_acc 0.918100
epoch 7, train_loss 0.326648,test_loss 0.308872,train_acc 0.915950,test_acc 0.921900
epoch 8, train_loss 0.318852,test_loss 0.303240,train_acc 0.918417,test_acc 0.921600
epoch 9, train_loss 0.312746,test_loss 0.296987,train_acc 0.920083,test_acc 0.923600
epoch 10, train_loss 0.307117,test_loss 0.292110,train_acc 0.921983,test_acc 0.926500
epoch 11, train_loss 0.301997,test_loss 0.286209,train_acc 0.923567,test_acc 0.927100
epoch 12, train_loss 0.297676,test_loss 0.282990,train_acc 0.92