In [6]:
# VGG
# AlexNet比LeNet更深更大更好, 能不能更深更大
# 选项
#  - 更多的全连接层(太贵)
#  - 更多的卷积层
#  - 将卷积层组合成块

# VGG块
# 深 vs. 宽
#  - 5x5卷积
#  - 3x3卷积
#  - 深但窄效果更好
# VGG块
#  - 3x3卷积(填充1), (n层 m通道)
#  - 2x2 Maxpool (步幅2)

# VGG架构
#  - 多个VGG块后接全连接层
#  - 不同次数的重复块得到不同的架构, VGG-16, VGG-19

# 总结
# VGG使用可重复使用的卷积块来构建深度卷积神经网络
# 不同的卷积块个数和超参数可以得到不同复杂度的变种

In [10]:
import torch
from torch import nn
from d2l import torch as d2l

def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(
            in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels # 用完之后换一下
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)


In [12]:
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512)) # 第一个数是多少层卷积, 第二个数是输出通道数

def vgg(conv_arch):
    conv_blks = []
    in_channels = 1 #一开始通道数为1
    for(num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(
            num_convs, in_channels, out_channels))
        in_channels = out_channels # 将下一块的输入通道数变为上一层的输出通道数
     
    return nn.Sequential(*conv_blks, nn.Flatten(), 
            nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), # 这里 7*7是因为224除了5次之后就是7
            nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(),
            nn.Dropout(p=0.5), nn.Linear(4096, 10))
net = vgg(conv_arch)

In [13]:
# 观察每层的输出情况
X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print(blk.__class__.__name__, 'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 10])


In [21]:
# 因为VGG-11计算量大, 构建一个通道数小的
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

In [None]:
lr, num_epochs, batch_size = 0.05, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())