In [8]:
import torch
import torch.nn as nn
from d2l import torch as d2l
import torch.nn.functional as F

# 其他网络结构
> 这里不会重点介绍网络的训练细节, 只是介绍网络的结构以及组成

## NiN(网络中的网络)
### NiN 块
`NiN`块结构如下:
![图片.png](attachment:0c38f06f-2cda-44f1-a542-d24a3fa6d7e9.png)
### NiN 网络
`NiN`网络由一系列的 `NiN`块组成, 并且通过汇聚层进行连接

In [2]:
# NiN 块
def nin_block(in_channels, out_channels, kernel_size, strides, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU()
    )

In [3]:
# NiN 模型
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, strides=4, padding=0),
    nn.MaxPool2d(3, stride=2),
    nin_block(96, 256, kernel_size=5, strides=1, padding=2),
    nn.MaxPool2d(3, stride=2),
    nin_block(256, 384, kernel_size=3, strides=1, padding=1),
    nn.MaxPool2d(3, stride=2),
    nn.Dropout(0.5),
    nin_block(384, 10, kernel_size=3, strides=1, padding=1),
    nn.AdaptiveAvgPool2d((1, 1)), # 此时输出图尺寸调整为 (batch_size, channels, 1, 1)
    nn.Flatten()
)

In [7]:
X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, '\t output shape: \t', X.shape)

Sequential 	 output shape: 	 torch.Size([1, 96, 54, 54])
MaxPool2d 	 output shape: 	 torch.Size([1, 96, 26, 26])
Sequential 	 output shape: 	 torch.Size([1, 256, 26, 26])
MaxPool2d 	 output shape: 	 torch.Size([1, 256, 12, 12])
Sequential 	 output shape: 	 torch.Size([1, 384, 12, 12])
MaxPool2d 	 output shape: 	 torch.Size([1, 384, 5, 5])
Dropout 	 output shape: 	 torch.Size([1, 384, 5, 5])
Sequential 	 output shape: 	 torch.Size([1, 10, 5, 5])
AdaptiveAvgPool2d 	 output shape: 	 torch.Size([1, 10, 1, 1])
Flatten 	 output shape: 	 torch.Size([1, 10])


## GoogLeNet(含并行连接的网络)
### Inception块
在 `GoogLeNet`中, 基本的卷积块称为 `Inception` 块, `Inception` 块的结构如下:
![图片.png](attachment:c6cdfa84-fe8b-41f8-b766-14541479b6ed.png)
注意四条路径中, 每一条路径对应的图片大小都不会改变, 可能改变的就是通道数量
### GoogLeNet 模型
`GoogLeNet`使用 `9` 个 `Inception` 块和全局的平均汇聚层的堆叠来生成估计值, `GoogLeNet`架构如下:
![图片.png](attachment:1623daf9-7f20-462f-a9c7-ed55b7787118.png)

In [16]:
# Inception 块
class Inception(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p3, p4), dim=1)

In [17]:
# GoogLeNet 模型
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                   Inception(256, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                   Inception(512, 160, (112, 224), (24, 64), 64),
                   Inception(512, 128, (128, 256), (24, 64), 64),
                   Inception(512, 112, (144, 288), (32, 64), 64),
                   Inception(528, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                   Inception(832, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

In [18]:
# 网络形状
X = torch.rand(size=(1, 1, 96, 96))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, '\t output shape: \t', X.shape)

Sequential 	 output shape: 	 torch.Size([1, 64, 24, 24])
Sequential 	 output shape: 	 torch.Size([1, 192, 12, 12])
Sequential 	 output shape: 	 torch.Size([1, 480, 6, 6])
Sequential 	 output shape: 	 torch.Size([1, 832, 3, 3])
Sequential 	 output shape: 	 torch.Size([1, 1024])
Linear 	 output shape: 	 torch.Size([1, 10])
