In [25]:
import torch
import os
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from torchsummary import summary
from pathlib import Path
from torch import nn
from torchvision import datasets
from torchvision import transforms
from torch.utils import data
from pathlib import Path
from torch.nn import functional as F

## 定义残差块

In [26]:
class Residual(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1, flag=False):
        super().__init__()
        if flag:
            self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride)
        else:
            self.conv3 = None
            stride = 1
        self.conv1 = nn.Conv2d(in_channel, out_channel, 3, padding=1, stride=stride)
        self.norm1 = nn.BatchNorm2d(out_channel)
        self.conv2 = nn.Conv2d(out_channel, out_channel, 3, padding=1)
        self.norm2 = nn.BatchNorm2d(out_channel)

    def forward(self, x):
        y = self.norm2(self.conv2(F.relu(self.norm1(self.conv1(x)))))
        if self.conv3:
            x = self.conv3(x)
        return F.relu(x + y)

## 测试残差块

In [27]:
x = torch.rand(16, 1, 224, 224)
block = Residual(1, 1, flag=0, stride=2)
block(x).shape

torch.Size([16, 1, 224, 224])

## 定义复合残差块

In [28]:
def resblock(num, channels):
    layers = list()
    for i in range(num):
        flag = True if (i + 1) % 2 else False
        stride = flag + 1
        layers.append(Residual(channels[i], channels[i + 1], flag=flag, stride=stride))
    return layers

## 定义xavier初始化方法

In [29]:
def init_weight(m):
    if type(m) in [nn.Conv2d, nn.Linear]:
        nn.init.xavier_normal_(m.weight)

## 定义残差网络

In [35]:
args = (64, 128, 128, 256, 256, 512, 512)
stage1 = nn.Sequential(
    nn.Conv2d(1, 64, 7, padding=3, stride=2),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2, padding=1),
)
stage2 = nn.Sequential(Residual(64, 64), Residual(64, 64))
stage3 = resblock(6, args)
stage4 = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(512, 10))
net = nn.Sequential(stage1, stage2, *stage3, stage4)
list(net.children())

[Sequential(
   (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
   (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (2): ReLU()
   (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
 ),
 Sequential(
   (0): Residual(
     (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   )
   (1): Residual(
     (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, 

## 参数量估计

In [31]:
summary(net, (1, 224, 224), batch_size=64, device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [64, 64, 112, 112]           3,200
       BatchNorm2d-2         [64, 64, 112, 112]             128
              ReLU-3         [64, 64, 112, 112]               0
         MaxPool2d-4           [64, 64, 56, 56]               0
            Conv2d-5           [64, 64, 56, 56]          36,928
       BatchNorm2d-6           [64, 64, 56, 56]             128
            Conv2d-7           [64, 64, 56, 56]          36,928
       BatchNorm2d-8           [64, 64, 56, 56]             128
          Residual-9           [64, 64, 56, 56]               0
           Conv2d-10           [64, 64, 56, 56]          36,928
      BatchNorm2d-11           [64, 64, 56, 56]             128
           Conv2d-12           [64, 64, 56, 56]          36,928
      BatchNorm2d-13           [64, 64, 56, 56]             128
         Residual-14           [64, 64,

## 超参数

In [32]:
transform = transforms.Compose([transforms.Resize([224, 224]), transforms.ToTensor(),])
mnist_train = datasets.FashionMNIST("./data", train=True, transform=transform, download=True)
mnist_test = datasets.FashionMNIST("./data", train=False, transform=transform, download=True)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 0.1)
epochs = 10
device = "cuda:0" if torch.cuda.is_available() else "cpu"
data_train = data.DataLoader(mnist_train, batch_size=128, shuffle=True, num_workers=8)

## 指定设备

In [33]:
net.apply(init_weight)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
net.to(device)

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Residual(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (norm2): BatchNorm2d(64,

## 测试输入输出

In [9]:
x = x.cuda()
for layer in net:
    x = layer(x)
    print(layer.__class__.__name__, "outsize", x.shape)

Sequential outsize torch.Size([16, 64, 56, 56])
Sequential outsize torch.Size([16, 64, 56, 56])
Residual outsize torch.Size([16, 128, 28, 28])
Residual outsize torch.Size([16, 128, 28, 28])
Residual outsize torch.Size([16, 256, 14, 14])
Residual outsize torch.Size([16, 256, 14, 14])
Residual outsize torch.Size([16, 512, 7, 7])
Residual outsize torch.Size([16, 512, 7, 7])
Sequential outsize torch.Size([16, 10])


## 定义训练函数

In [10]:
def trainer(net, data, loss, optimizer, epoch, device):
    print(f"第{epoch + 1}次迭代,网络训练中...")
    for x, y in data:
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        l = loss(net(x), y)
        l.backward()
        optimizer.step()

## 定义预测函数

In [12]:
def infer(data, net):
    net.eval()
    scores = list()
    for feature, label in data:
        feature = feature.cuda()
        with torch.no_grad():
            X = F.softmax(net(feature.view(-1, 1, 224, 224)), dim=1)
        scores.append(X.argmax() == label)
    accuracy = sum(scores) / len(scores)
    return accuracy.cpu().numpy()

泛化精度： tensor(0.9128, device='cuda:0')


## 网络训练

In [None]:
acc = list()
for epoch in range(epochs):
    trainer(net, data_train, optimizer, epoch, device)
    acc.append(infer(mnist_test, net))

In [None]:
plt.rcParams["font.sans-serif"] = "SimHei"
plt.rcParams["axes.unicode_minus"] = False
plt.figure()
plt.tight_layout()
plt.plot(range(len(acc)), acc, ls="--", label="泛化精度")