# 多层感知机

In [26]:
import torch
from torch import nn
from d2l import torch as d2l
import torchvision

batch_size = 256
def load_data(batch_size, isTrain=True):
    dataset = torchvision.datasets.FashionMNIST(root='../data',
                                                train=isTrain,
                                                transform=torchvision.transforms.ToTensor())
    data_loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=isTrain)
    for imgs, labels in data_loader:
        yield imgs.reshape(-1, 784).type(torch.float32), labels
# train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
train_iter = load_data(batch_size, True)
test_iter = load_data(batch_size, False)

In [27]:
class Accumulator:
    def __init__(self, n):
        self.data = [0.] * n
        
    def __getitem__(self, i):
        return self.data[i]
    
    def add(self, *args):
        self.data = [a + b for (a, b) in zip(self.data, args)]

### 定义ReLu函数

In [28]:
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

In [29]:
W1 = nn.Parameter(torch.randn(784, 256, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(256, requires_grad=True))
W2 = nn.Parameter(torch.randn(256, 10, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(10, requires_grad=True))

In [30]:
def softmax(X):
    exp_X = torch.exp(X)
    return exp_X / exp_X.sum(1, keepdim=True)

In [31]:
def net(X):
    X = X.reshape(-1, 784)
    H = relu(X @ W1 + b1)
    return (H @ W2 + b2)
#     return X @ W1 + b1

In [32]:
loss = nn.CrossEntropyLoss()

In [34]:

params = [W1, b1, W2, b2]
updater = torch.optim.SGD(params, 0.1)
num_epochs = 10
# d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)

for epoch in range(num_epochs):
    acc = Accumulator(2)
    lacc = Accumulator(1)
    train_iter = load_data(batch_size, isTrain=True)
    test_iter = load_data(batch_size, isTrain=False)
    for x, y in train_iter:
        updater.zero_grad()
        y_hat = net(x)
        l = loss(y_hat, y)
        lacc.add(l)
        l.backward()
        updater.step()
    print(lacc[0])
    for x, y in test_iter:
        with torch.no_grad():
            y_hat = net(x)
            correct_num = (torch.argmax(y_hat, dim=1) == y).type(y.dtype).sum()
            acc.add(correct_num, len(y))
# #             print(softmax(y_hat))
    print(acc[0], acc[1])

tensor(245.1989, grad_fn=<AddBackward0>)
tensor(6994.) 10000.0
tensor(140.5772, grad_fn=<AddBackward0>)
tensor(8017.) 10000.0
tensor(122.2071, grad_fn=<AddBackward0>)
tensor(8236.) 10000.0
tensor(112.6896, grad_fn=<AddBackward0>)
tensor(8180.) 10000.0
tensor(106.3108, grad_fn=<AddBackward0>)
tensor(8153.) 10000.0
tensor(101.6768, grad_fn=<AddBackward0>)
tensor(8292.) 10000.0
tensor(97.6805, grad_fn=<AddBackward0>)
tensor(8370.) 10000.0
tensor(94.8235, grad_fn=<AddBackward0>)
tensor(8464.) 10000.0
tensor(91.7707, grad_fn=<AddBackward0>)
tensor(8155.) 10000.0
tensor(89.3033, grad_fn=<AddBackward0>)
tensor(8517.) 10000.0
