In [27]:
import torch
from torch import nn


def vgg_block(num_convs, in_channels, out_channels):
    '''
    构建相同的卷积层

    num_convs: 卷积层的数量
    in_channels: 输入通道数
    out_channels: 输出通道数
    '''
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels,
                                kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)

In [28]:
def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    # 卷积层部分
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, nn.Flatten(),
        # 全连接层部分
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
net = vgg(conv_arch)

In [29]:
X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
    X = blk(X)
    print(blk.__class__.__name__,'output shape:\t',X.shape)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 10])


In [30]:
import torchvision


# 加载Fashion-MNIST数据集
def loadFashion(root, trans, batch_size, resize=None, download=False):
    if resize:
        trans.insert(0, torchvision.transforms.Resize(size=resize))
    trans = torchvision.transforms.Compose(trans)

    train_dataset = torchvision.datasets.FashionMNIST(root=root, train=True, transform=trans, download=download)
    test_dataset = torchvision.datasets.FashionMNIST(root=root, train=False, transform=trans, download=download)

    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_iter, test_iter

In [31]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            net.eval()
            acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).sum().item()
            # 改回训练模式
            net.train()

            n += y.shape[0]
    return acc_sum / n

In [32]:
def train(net, train_iter, test_iter, optimizer, loss, num_epochs, device):
    net = net.to(device)
    print('training on', device)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count = 0, 0, 0, 0
        for X, y in train_iter:
            net.train()
            X = X.float().to(device)
            y = y.long().to(device)
            y_hat = net(X)
            l = loss(y_hat, y)  # 损失函数(网络预测，标签)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()

        batch_count += 1
        n += y.shape[0]

        test_acc = evaluate_accuracy(test_iter, net)
        print("epoch %d, loss %.4f, train acc %.3f, test acc %.3f" % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc))

In [33]:
# 缩减VGG网络通道数
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

batch_size = 64
trans = [torchvision.transforms.ToTensor()]
train_iter, test_iter = loadFashion(root="../../Data/FashionMNIST", trans=trans, batch_size=batch_size, resize=224, download=False)

lr = 0.05
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

loss = torch.nn.CrossEntropyLoss()

num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train(net, train_iter, test_iter, optimizer, loss, num_epochs, device)

training on cuda
epoch 1, loss 27401337921.4519, train acc 183.375, test acc 0.100
epoch 2, loss 2262.7291, train acc 187.562, test acc 0.100
epoch 3, loss 2183.0598, train acc 185.844, test acc 0.100
epoch 4, loss 2184.3004, train acc 190.125, test acc 0.100
epoch 5, loss 2170.5317, train acc 185.750, test acc 0.100
epoch 6, loss 2175.0172, train acc 186.188, test acc 0.100
epoch 7, loss 2351.1770, train acc 184.844, test acc 0.100
epoch 8, loss 2410.0323, train acc 185.312, test acc 0.100
epoch 9, loss 2183.1861, train acc 186.406, test acc 0.100
epoch 10, loss 2180.5256, train acc 187.500, test acc 0.100


In [34]:
import torch
import torch.nn as nn
x_input=torch.randn(3,4)#随机生成输入 
print('x_input:\n',x_input) 
y_target=torch.tensor([1,2,0])#设置输出具体值 print('y_target\n',y_target)
print(x_input.shape, y_target.shape)
crossentropyloss=nn.CrossEntropyLoss()
crossentropyloss_output=crossentropyloss(x_input,y_target)
print('crossentropyloss_output:\n',crossentropyloss_output)

x_input:
 tensor([[-0.0532,  0.9180,  0.4783, -0.4147],
        [-1.0027,  1.0441,  0.0823,  0.1197],
        [ 0.3484,  0.3213,  0.0848,  0.9904]])
torch.Size([3, 4]) torch.Size([3])
crossentropyloss_output:
 tensor(1.3234)
