# 19 multi-classification

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
batch_size=200
learning_rate=0.01
epochs=10

###### 数据加载

In [3]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size, shuffle=True)

###### 网络参数设置

In [4]:
w1, b1 = torch.randn(200, 784, requires_grad=True),\
         torch.zeros(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True),\
         torch.zeros(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True),\
         torch.zeros(10, requires_grad=True)

###### 何凯明初始化

影响train的因素：1.learning rate过大 2.gradient vanished 3.初始化

In [5]:
torch.nn.init.kaiming_normal_(w1)      #不使用的话可能导致loss不更新。
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)

tensor([[-0.0650,  0.2281, -0.0373,  ..., -0.1118, -0.0446, -0.0709],
        [-0.0175,  0.0197,  0.0429,  ..., -0.0959, -0.2082, -0.1430],
        [ 0.0222,  0.1012,  0.0431,  ...,  0.0101, -0.0253, -0.1775],
        ...,
        [ 0.1100,  0.0344,  0.2061,  ...,  0.1183,  0.0868, -0.0626],
        [ 0.0494,  0.0367, -0.0309,  ..., -0.0782,  0.0014,  0.0220],
        [-0.0151,  0.1183,  0.0261,  ..., -0.0092,  0.0322, -0.0596]],
       requires_grad=True)

###### 前向传播

In [6]:
def forward(x):
    x = x@w1.t() + b1
    x = F.relu(x)
    x = x@w2.t() + b2
    x = F.relu(x)
    x = x@w3.t() + b3
    x = F.relu(x)
    return x

In [7]:
optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
criteon = nn.CrossEntropyLoss()

In [8]:
for epoch in range(epochs):

    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(-1, 28*28)

        logits = forward(data)
        loss = criteon(logits, target)

        optimizer.zero_grad()
        loss.backward()
        # print(w1.grad.norm(), w2.grad.norm())
        optimizer.step()

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))


    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        logits = forward(data)
        test_loss += criteon(logits, target).item()

        pred = logits.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0018, Accuracy: 8965/10000 (89%)


Test set: Average loss: 0.0014, Accuracy: 9184/10000 (91%)


Test set: Average loss: 0.0012, Accuracy: 9280/10000 (92%)


Test set: Average loss: 0.0011, Accuracy: 9344/10000 (93%)


Test set: Average loss: 0.0010, Accuracy: 9388/10000 (93%)


Test set: Average loss: 0.0010, Accuracy: 9439/10000 (94%)


Test set: Average loss: 0.0009, Accuracy: 9453/10000 (94%)


Test set: Average loss: 0.0008, Accuracy: 9498/10000 (94%)


Test set: Average loss: 0.0008, Accuracy: 9523/10000 (95%)


Test set: Average loss: 0.0008, Accuracy: 9538/10000 (95%)

