<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/04212.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 3.13 丢弃法

### 3.13.2 从零开始实现

In [0]:
%matplotlib inline 
import torch 
import torch.nn as nn 
import numpy as np
import d2l 


def dropout(X, drop_prob):
    X = X.float()
    assert 0 <= drop_prob <= 1 
    keep_prob = 1 - drop_prob 
    
    if keep_prob == 0:
        return torch.zeros_like(X)
    mask = (torch.rand(X.shape) < keep_prob).float()

    return mask * X / keep_prob 

In [4]:
X = torch.arange(16).view(2, 8)
dropout(X, 0)

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])

In [5]:
dropout(X, 0.5)

tensor([[ 0.,  0.,  4.,  0.,  0.,  0., 12.,  0.],
        [ 0., 18., 20.,  0.,  0., 26.,  0., 30.]])

In [6]:
dropout(X, 1)

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])

#### 1. 定义模型参数

In [0]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 

W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)

params = [W1, b1, W2, b2, W3, b3]

#### 2. 定义模型

In [0]:
drop_prob1, drop_prob2 = 0.2, 0.5 

def net(X, is_training=True):
    X = X.view(-1, num_inputs)
    H1 = (torch.matmul(X, W1) + b1).relu()
    if is_training:
        H1 = dropout(H1, drop_prob1)
    H2 = (torch.matmul(H1, W2) + b2).relu()
    if is_training:
        H2 = dropout(H2, drop_prob2)
    return torch.matmul(H2, W3) + b3 

In [0]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0 
    for X, y in data_iter:
        if isinstance(net, torch.nn.Module):
            net.eval()
            acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            net.train()
        else:
            if ('is_training' in net.__code__.co_varnames):
                acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum()
            else:
                acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n 

#### 3. 训练和测试模型

In [12]:
num_epochs, lr, batch_size = 5, 100.0, 256 
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 1.1633, train acc 0.547, test acc 0.746
epoch 2, loss 0.5898, train acc 0.784, test acc 0.768
epoch 3, loss 0.4866, train acc 0.825, test acc 0.833
epoch 4, loss 0.4475, train acc 0.838, test acc 0.781
epoch 5, loss 0.4173, train acc 0.848, test acc 0.843


### 3.13.3 简洁实现

In [0]:
net = nn.Sequential(
    d2l.FlattenLayer(), 
    nn.Linear(num_inputs, num_hiddens1), 
    nn.ReLU(), 
    nn.Dropout(drop_prob1), 
    nn.Linear(num_hiddens1, num_hiddens2),
    nn.ReLU(), 
    nn.Dropout(drop_prob2), 
    nn.Linear(num_hiddens2, 10)
)

for param in net.parameters():
    nn.init.normal_(param, mean=0, std=0.01)

In [15]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 1.1184, train acc 0.563, test acc 0.732
epoch 2, loss 0.5849, train acc 0.784, test acc 0.805
epoch 3, loss 0.4855, train acc 0.823, test acc 0.838
epoch 4, loss 0.4415, train acc 0.838, test acc 0.824
epoch 5, loss 0.4131, train acc 0.848, test acc 0.842
