## 1.从零开始实现

In [2]:
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn

def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    # 这种情况下把全部元素都丢弃
    if keep_prob == 0:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) < keep_prob
    return mask * X / keep_prob

In [3]:
X = nd.arange(16).reshape((2, 8))
dropout(X, 0)


[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]]
<NDArray 2x8 @cpu(0)>

In [4]:
dropout(X, 0.5)


[[ 0.  2.  4.  6.  0.  0.  0. 14.]
 [ 0. 18.  0.  0. 24. 26. 28.  0.]]
<NDArray 2x8 @cpu(0)>

In [5]:
dropout(X, 1)


[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 2x8 @cpu(0)>

### 定义模型参数

In [6]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.random.normal(scale = 0.01, shape = (num_inputs, num_hiddens1))
b1 = nd.zeros(num_hiddens1)
W2 = nd.random.normal(scale = 0.01, shape = (num_hiddens1, num_hiddens2))
b2 = nd.zeros(num_hiddens2)
W3 = nd.random.normal(scale = 0.01, shape = (num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

### 定义模型

In [7]:
drop_prob1, drop_prob2 = 0.2, 0.5

def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training(): # 只在训练模型时使用丢弃法
        H1 = dropout(H1, drop_prob1)
    H2 = (nd.dot(H1, W2) + b2).relu() # 在第一层全连接后添加丢弃层
    if autograd.is_training():
        H2 = dropout(H2, drop_prob2) # 在第二层全连接后添加丢弃层
    return nd.dot(H2, W3) + b3

### 训练和测试模型

In [8]:
num_epochs, lr, batch_size = 5, 0.5, 256
loss = gloss.SoftmaxCrossEntropyLoss()
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 1.3115, train acc 0.494, test acc 0.717
epoch 2, loss 0.6323, train acc 0.761, test acc 0.824
epoch 3, loss 0.5281, train acc 0.807, test acc 0.842
epoch 4, loss 0.4759, train acc 0.825, test acc 0.854
epoch 5, loss 0.4448, train acc 0.839, test acc 0.853


## 2.Gluon实现

In [9]:
net = nn.Sequential()
net.add(nn.Dense(256, activation = 'relu'),
       nn.Dropout(drop_prob1), # 在第一个全连接层后添加丢弃层
       nn.Dense(256, activation = 'relu'),
       nn.Dropout(drop_prob2), # 在第二个全连接层后添加丢弃层
                 nn.Dense(10))
net.initialize(init.Normal(sigma = 0.01))

### 训练并测试模型

In [10]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
            None, None, trainer)

epoch 1, loss 1.1933, train acc 0.535, test acc 0.769
epoch 2, loss 0.6014, train acc 0.773, test acc 0.786
epoch 3, loss 0.5035, train acc 0.816, test acc 0.850
epoch 4, loss 0.4541, train acc 0.835, test acc 0.855
epoch 5, loss 0.4223, train acc 0.847, test acc 0.864
