In [1]:
import torch
from torch import nn


def vgg_block(num_convs, in_channels, out_channels):
    '''
    构建相同的卷积层

    num_convs: 卷积层的数量
    in_channels: 输入通道数
    out_channels: 输出通道数
    '''
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels,
                                kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)

In [2]:
def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    # 卷积层部分
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, nn.Flatten(),
        # 全连接层部分
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
net = vgg(conv_arch)

In [3]:
X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print(blk.__class__.__name__,'output shape:\t',X.shape)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 10])


In [4]:
import torchvision


# 加载Fashion-MNIST数据集
def loadFashion(root, trans, batch_size, resize=None, download=False):
    if resize:
        trans.insert(0, torchvision.transforms.Resize(size=resize))
    trans = torchvision.transforms.Compose(trans)

    train_dataset = torchvision.datasets.FashionMNIST(root=root, train=True, transform=trans, download=download)
    test_dataset = torchvision.datasets.FashionMNIST(root=root, train=False, transform=trans, download=download)

    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_iter, test_iter

In [5]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval()
            acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).sum().item()
            # 改回训练模式
            net.train()

            n += y.shape[0]
    return acc_sum / n

In [13]:
def train(net, train_iter, test_iter, optimizer, loss, num_epochs, device):
    net = net.to(device)
    print('training on', device)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count = 0, 0, 0, 0
        for X, y in train_iter:
            X = X.to(device).float()
            y = y.to(device).float()
            y_hat = net(X)
            print(y_hat.shape, y.shape)
            print(y_hat, y)
            l = loss(y, y_hat)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l += l.items()
            train_acc += (y_hat.argmax(dim=1) == y).sum().items()
        
        test_acc = evaluate_accuracy(test_iter, net)
        print("epoch %d, loss %.4f, train acc %.3f, test acc %.3f" % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc))

In [15]:
# 缩减VGG网络通道数
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

batch_size = 64
trans = [torchvision.transforms.ToTensor()]
train_iter, test_iter = loadFashion(root="../../Data/FashionMNIST", trans=trans, batch_size=batch_size, resize=224, download=False)

lr = 0.05
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

loss = torch.nn.MultiLabelSoftMarginLoss()

num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train(net, train_iter, test_iter, optimizer, loss, num_epochs, device)

training on cuda
torch.Size([64, 10]) torch.Size([64])
tensor([[-6.6448e-03,  5.3274e-03, -6.7170e-03, -7.9008e-03, -2.9880e-03,
          3.7116e-03, -3.4850e-03,  1.2013e-02,  5.4014e-03,  1.9229e-02],
        [-8.4889e-03, -3.2992e-03,  1.3547e-03, -1.4991e-02, -2.1871e-03,
          1.1027e-02, -1.5944e-02,  2.2126e-02,  1.4447e-02,  9.5951e-03],
        [-1.0463e-02, -6.8581e-03, -5.7257e-03, -2.1024e-04, -6.8161e-05,
          1.4949e-02, -5.1320e-03,  1.5571e-02,  1.2104e-02,  2.2553e-02],
        [-7.4170e-03,  8.9638e-03, -3.7648e-03, -1.5423e-02,  7.6153e-03,
          1.6581e-02,  7.2702e-03,  6.9583e-03,  1.0425e-02,  1.4100e-02],
        [ 4.8519e-04, -7.0686e-04, -6.0318e-03, -1.0426e-02,  1.1282e-02,
          1.5330e-02, -2.9496e-03,  1.0973e-02,  6.5666e-03,  2.1552e-02],
        [-9.2656e-03, -1.8314e-03, -1.4939e-03, -3.5001e-03,  3.2234e-03,
          1.3688e-02,  1.0670e-03,  2.3732e-03,  8.4491e-03,  8.9327e-03],
        [-4.5618e-03, -4.6092e-03, -8.2390e-03, -1.

RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1

In [32]:
loss = torch.nn.CrossEntropyLoss()

a = torch.randint(0, 10, (2, 3))
b = torch.randint(0, 10, (2, 1))
print(a, b)
loss(a, b)

tensor([[5, 6, 9],
        [6, 3, 6]]) tensor([[5],
        [0]])


RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'