# 3.10 多层感知机的简洁实现

In [2]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

print(torch.__version__)

1.8.1


## 3.10.1 定义模型

In [3]:
num_inputs = 784 
num_outputs = 10
num_hiddens = 256
    
net = nn.Sequential(
        nn.Flatten(), # 保留第 0 维， 其他维度碾平
        nn.Linear(num_inputs, num_hiddens), # shape: 784 x 256
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), # shape: 256 x 10
        )

# 实际上 pytorch 会自动初始化参数, 以下两行运行不影响实际结果
# for params in net.parameters():
#     init.normal_(params, mean=0, std=0.01)

In [4]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.0171, -0.0056,  0.0147,  ..., -0.0205, -0.0121,  0.0243],
        [ 0.0019,  0.0246, -0.0232,  ...,  0.0292,  0.0012,  0.0153],
        [-0.0166,  0.0092,  0.0111,  ..., -0.0136, -0.0012, -0.0028],
        ...,
        [-0.0285,  0.0353, -0.0109,  ...,  0.0235, -0.0082, -0.0203],
        [-0.0055,  0.0169,  0.0258,  ..., -0.0211,  0.0335,  0.0270],
        [ 0.0124,  0.0286, -0.0092,  ..., -0.0048, -0.0231, -0.0309]],
       requires_grad=True)
Parameter containing:
tensor([-0.0254, -0.0029, -0.0108, -0.0327,  0.0345, -0.0022,  0.0125,  0.0129,
         0.0005, -0.0087, -0.0033,  0.0240,  0.0327,  0.0165, -0.0263,  0.0247,
         0.0312, -0.0106, -0.0114,  0.0158,  0.0174, -0.0315,  0.0247, -0.0040,
         0.0280, -0.0125,  0.0038, -0.0156,  0.0322,  0.0352,  0.0176,  0.0111,
        -0.0027, -0.0279,  0.0046,  0.0242, -0.0086,  0.0265, -0.0307, -0.0008,
        -0.0213,  0.0221,  0.0122, -0.0187, -0.0109, -0.0272, -0.0044, -0.0286,
        -0.0238

## 3.10.2 读取数据并训练模型

In [5]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# 迭代次数 = 样本总数 / batch_size
print(len(train_iter))
print(len(test_iter))
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

len(mnist_train) 60000
len(mnist_test) 10000
235
40
epoch 1, loss 0.0069, train acc 0.556, test acc 0.650
epoch 2, loss 0.0042, train acc 0.671, test acc 0.672
epoch 3, loss 0.0034, train acc 0.705, test acc 0.708
epoch 4, loss 0.0030, train acc 0.736, test acc 0.737
epoch 5, loss 0.0028, train acc 0.759, test acc 0.758
epoch 6, loss 0.0026, train acc 0.775, test acc 0.773
epoch 7, loss 0.0025, train acc 0.787, test acc 0.779
epoch 8, loss 0.0024, train acc 0.798, test acc 0.786
epoch 9, loss 0.0023, train acc 0.804, test acc 0.794
epoch 10, loss 0.0022, train acc 0.810, test acc 0.799
