<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/02165.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 5.8 网络中的网络 ( NiN )

### 5.8.1 NiN 块

In [0]:
import torch
from torch import nn, optim
import d2l 
import time 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 


def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    blk = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), 
               nn.ReLU(), 
               nn.Conv2d(out_channels, out_channels, kernel_size=1), 
               nn.ReLU(), 
               nn.Conv2d(out_channels, out_channels, kernel_size=1), 
               nn.ReLU())
    return blk 

### 5.8.2 NiN 模型

In [0]:
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0), 
    nn.MaxPool2d(kernel_size=3, stride=2), 
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2), 
    nin_block(256, 384, kernel_size=3, stride=1, padding=1), 
    nn.MaxPool2d(kernel_size=3, stride=2), 
    nn.Dropout(0.5), 
    nin_block(384, 10, kernel_size=3, stride=1, padding=1), 
    nn.AvgPool2d(kernel_size=5),
    d2l.FlattenLayer()
)

In [4]:
X = torch.rand(1, 1, 224, 224)
for name, blk in net.named_children():
    X = blk(X)
    print(name, 'output shape: ', X.shape)

0 output shape:  torch.Size([1, 96, 54, 54])
1 output shape:  torch.Size([1, 96, 26, 26])
2 output shape:  torch.Size([1, 256, 26, 26])
3 output shape:  torch.Size([1, 256, 12, 12])
4 output shape:  torch.Size([1, 384, 12, 12])
5 output shape:  torch.Size([1, 384, 5, 5])
6 output shape:  torch.Size([1, 384, 5, 5])
7 output shape:  torch.Size([1, 10, 5, 5])
8 output shape:  torch.Size([1, 10, 1, 1])
9 output shape:  torch.Size([1, 10])


### 5.8.3 获取数据和训练模型

In [6]:
batch_size = 128 
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

lr, num_epochs = 0.002, 5 
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 2.3037, train acc 0.099, test acc 0.100, time 142.3 sec
epoch 2, loss 2.3026, train acc 0.100, test acc 0.100, time 142.2 sec
epoch 3, loss 2.3026, train acc 0.100, test acc 0.100, time 142.1 sec
epoch 4, loss 2.3026, train acc 0.100, test acc 0.100, time 141.5 sec
epoch 5, loss 2.3026, train acc 0.100, test acc 0.100, time 142.6 sec
