# 3.9 多层感知机的从零开始实现

In [16]:
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.nn import init
import torch
import torchvision
import numpy as np
import sys

## 获取和读取数据

In [17]:
def load_data_fashion_mnist(batch_size, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    transform = transforms.ToTensor()
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

In [18]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

## 定义模型参数
输入个数为784，输出个数为10。实验中，我们设超参数隐藏单元个数为256。

In [19]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256
W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

## 定义激活函数和损失函数
这里使用`max`来实现ReLU，并非直接调用。

In [31]:
def softmax(x):
    x_exp=x.exp()
    tot=x_exp.sum(dim=1,keepdim=True) #表示对第1维（行）求和且保持维度
    return x_exp / tot
def relu(X):
    return torch.max(input=X, other = torch.tensor(0.0))
def loss(y_hat,y):
    return -torch.log(y_hat.gather(1,y.view(-1,1)))
    # torch.gather按索引取数
    # 如标签为y=[2,0]，对应真实概率为[0,0,1,...][1,0,0,...]，则从y_hat中取y.view（将y倒置）的数参与计


## 定义模型
同softmax回归一样，我们通过view函数将每张原始图像改成长度为num_inputs的向量。然后我们实现上一节中多层感知机的计算表达式。

In [42]:
def net(x):
    H=relu(torch.mm(x.view(-1,num_inputs),W1)+b1)
    return softmax(torch.mm(H,W2)+b2)

## 训练模型

In [43]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim = 1)==y).float().sum().item()
        n += y.shape[0]
    return acc_sum/n

def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，因为一般用PyTorch计算loss时就默认已经
    # 沿batch维求了平均了。
    for param in params:
        param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data

def train(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            sgd(params, lr, batch_size)
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
            n+= y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))            

In [45]:
num_epochs, lr = 10, 0.1
train(net, train_iter, test_iter, loss, num_epochs, batch_size, [W1, b1, W2, b2], lr)

epoch 1, loss 0.4319, train acc 0.849, test acc 0.840
epoch 2, loss 0.4160, train acc 0.854, test acc 0.845
epoch 3, loss 0.4021, train acc 0.858, test acc 0.843
epoch 4, loss 0.3917, train acc 0.862, test acc 0.850
epoch 5, loss 0.3813, train acc 0.865, test acc 0.854
epoch 6, loss 0.3719, train acc 0.867, test acc 0.855
epoch 7, loss 0.3628, train acc 0.872, test acc 0.852
epoch 8, loss 0.3584, train acc 0.873, test acc 0.859
epoch 9, loss 0.3509, train acc 0.876, test acc 0.861
epoch 10, loss 0.3447, train acc 0.877, test acc 0.862
