<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/0430.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 3.13 丢弃法

In [0]:
# !pip install mxnet d2lzh

In [0]:
import d2lzh as d2l 
from mxnet import autograd, gluon, init, nd 
from mxnet.gluon import loss as gloss, nn 

def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1 
    keep_prob = 1 - drop_prob
    if keep_prob == 0:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) < keep_prob 
    return mask * X / keep_prob

In [20]:
X = nd.arange(16).reshape((2, 8))
dropout(X, 0)


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @cpu(0)>

In [21]:
my_mask = nd.random.uniform(0, 1, X.shape)
my_mask


[[0.03067934 0.7395002  0.81692976 0.11655104 0.27300504 0.8716068
  0.08849455 0.4784922 ]
 [0.8272113  0.6626148  0.3926571  0.33689874 0.49089834 0.6919619
  0.5759835  0.4859751 ]]
<NDArray 2x8 @cpu(0)>

In [22]:
my_mask = nd.random.uniform(0, 1, X.shape) < 0.5 
my_mask


[[0. 1. 1. 1. 1. 0. 1. 1.]
 [0. 1. 0. 1. 1. 1. 1. 1.]]
<NDArray 2x8 @cpu(0)>

In [23]:
my_mask = nd.random.uniform(0, 1, X.shape) < 0.8
my_mask


[[1. 1. 1. 0. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 0. 1. 1.]]
<NDArray 2x8 @cpu(0)>

In [24]:
my_mask = nd.random.uniform(0, 1, X.shape) < 0.2 
my_mask


[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>

In [25]:
dropout(X, 0.5)


[[ 0.  0.  0.  6.  0. 10. 12.  0.]
 [ 0. 18. 20.  0.  0. 26.  0.  0.]]
<NDArray 2x8 @cpu(0)>

In [26]:
dropout(X, 1)


[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>

In [0]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

In [0]:
drop_prob1, drop_prob2 = 0.2, 0.5 

def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training():
        H1 = dropout(H1, drop_prob2)
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():
        H2 = dropout(H2, drop_prob1)
    return nd.dot(H2, W3) + b3 

In [29]:
num_epochs, lr, batch_size = 5, 0.5, 256 
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 1.1191, train acc 0.564, test acc 0.769
epoch 2, loss 0.5894, train acc 0.779, test acc 0.815
epoch 3, loss 0.5083, train acc 0.813, test acc 0.831
epoch 4, loss 0.4661, train acc 0.829, test acc 0.849
epoch 5, loss 0.4392, train acc 0.840, test acc 0.855


In [0]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'), 
    nn.Dropout(drop_prob1), 
    nn.Dense(256, activation='relu'), 
    nn.Dropout(drop_prob2), 
    nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))

In [16]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)

epoch 1, loss 1.1179, train acc 0.561, test acc 0.785
epoch 2, loss 0.5754, train acc 0.787, test acc 0.829
epoch 3, loss 0.4867, train acc 0.823, test acc 0.840
epoch 4, loss 0.4409, train acc 0.838, test acc 0.849
epoch 5, loss 0.4194, train acc 0.848, test acc 0.860
