# VGG块

VGG块的组成规律是：连续使⽤数个相同的填充为1、窗⼝形状为3x3 的卷积层后接上⼀个步幅为2、窗⼝形状为2x2的最⼤池化层。卷积层保持输⼊的⾼和宽不变，⽽池化层则对其减半。

In [2]:
import time
import torch
from torch import nn,optim

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
def vgg_block(num_convs,in_channels,out_channels):
    blk=[]
    for i in range(num_convs):
        if i==0:
            blk.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
        else:
            blk.append(nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1))
        blk.append(nn.ReLU())
    blk.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*blk)

# VGG网络

与AlexNet和LeNet⼀样，VGG⽹络由卷积层模块后接全连接层模块构成。卷积层模块串联数个 vgg_block ，其超参数由变量 conv_arch 定义。该变量指定了每个VGG块⾥卷积层个数和输⼊输出通道数。全连接模块则跟AlexNet中的⼀样。

In [4]:
conv_arch=((1,1,64),(1,64,128),(2,128,256),(2,256,512),(2,512,512))
#经过5个vgg_block, 宽⾼会减半5次, 变成 224/32 = 7
fc_features=512*7*7 
fc_hidden_units=4096

In [5]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer,self).__init__()
    def forward(self,x):
        return x.view(x.shape[0],-1)

In [6]:
def vgg(conv_arch,fc_features,fc_hidden_units=4096):
    net=nn.Sequential()
    for i,(num_conv,in_channels,out_channels) in enumerate(conv_arch):
        net.add_module('vgg_block_'+str(i+1),vgg_block(num_conv,in_channels,out_channels))
    net.add_module('fc',nn.Sequential(FlattenLayer(),
                                      nn.Linear(fc_features,fc_hidden_units),
                                      nn.ReLU(),
                                      nn.Dropout(0.5),
                                      nn.Linear(fc_hidden_units,fc_hidden_units),
                                      nn.ReLU(),
                                      nn.Dropout(0.5),
                                      nn.Linear(fc_hidden_units,10)
                                     ))
    return net

In [9]:
net=vgg(conv_arch,fc_features,fc_hidden_units)
X=torch.rand(2,1,224,224)
# named_children获取⼀级⼦模块及其名字(named_modules会返回所有⼦模块,包括⼦模块的⼦模块)
for name,blk in net.named_children():
    X=blk(X)
    print(name,'output shape:',X.shape)

vgg_block_1 output shape: torch.Size([2, 64, 112, 112])
vgg_block_2 output shape: torch.Size([2, 128, 56, 56])
vgg_block_3 output shape: torch.Size([2, 256, 28, 28])
vgg_block_4 output shape: torch.Size([2, 512, 14, 14])
vgg_block_5 output shape: torch.Size([2, 512, 7, 7])
fc output shape: torch.Size([2, 10])


# 获取数据和训练模型

In [10]:
ratio=8
small_conv_arch=[(1,1,64//ratio),(1,64//ratio,128//ratio),(2,128//ratio,256//ratio),(2,256//ratio,512//ratio),(2,512//ratio,512//ratio)]
net=vgg(small_conv_arch,fc_features//ratio,fc_hidden_units//ratio)
print(net)

Sequential(
  (vgg_block_1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [16]:
import torchvision
import sys
import torchvision.transforms as transforms
def load_data_fashion_mnist(batch_size,resize=None,root='./Datasets/FashionMNIST'):
    trans=[]
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())
    
    transform=torchvision.transforms.Compose(trans)
    mnist_train=torchvision.datasets.FashionMNIST(root=root,train=True,download=True,transform=transform)
    mnist_test=torchvision.datasets.FashionMNIST(root=root,train=False,download=True,transform=transform)
    train_iter=torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=0)
    test_iter=torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False,num_workers=0)
    return train_iter,test_iter

batch_size=64
train_iter,test_iter=load_data_fashion_mnist(batch_size,resize=224)
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)

In [18]:
def evaluate_accuracy(data_iter,net,device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    acc_sum,n=0.0,0
    with torch.no_grad():
        for X,y in data_iter:
            if isinstance(net,torch.nn.Module):
                net.eval()#评估模式，这会关闭dropout
                acc_sum+=(net(X.to(device)).argmax(dim=1)==y.to(device)).float().sum().cpu().item()
                net.train()#改回训练模式
            else:
                if('is_training' in net.__code__.co_varname):
                    acc_sum+=(net(X,is_training=False).argmax(dim=1)==y).float().sum().item()
            n+=y.shape[0]
    return acc_sum/n

In [17]:
def train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs):
    net=net.to(device)
    print('training on ',device)
    loss=torch.nn.CrossEntropyLoss()
    batch_count=0
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start=0.0,0.0,0,time.time()
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
            batch_count+=1
        test_acc=evaluate_accuracy(test_iter,net)
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,time %.lf sec'%(epoch+1,train_l_sum/batch_count,train_acc_sum/n,test_acc,time.time()-start))

In [19]:
train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1,loss 2.3032,train acc 0.101,test acc 0.100,time 569 sec
epoch 2,loss 1.1515,train acc 0.100,test acc 0.100,time 565 sec
epoch 3,loss 0.7676,train acc 0.099,test acc 0.100,time 564 sec
epoch 4,loss 0.5757,train acc 0.098,test acc 0.100,time 563 sec
epoch 5,loss 0.4606,train acc 0.098,test acc 0.100,time 564 sec
