## Alexnet网络实现

In [1]:
import os
import sys
import time
import warnings
import torch
import numpy as np
import pandas as pd
from torch import nn

In [2]:
# 实现alexnet

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 构造网络结构
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        # 构造初始换参数
        self.conv = nn.Sequential(
            # in_channels, out_channels, kernel_size, stride
            nn.Conv2d(1, 96, 11, 4)
            ,nn.ReLU()
            ,nn.MaxPool2d(3, 2)
            
            # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一样
            ,nn.Conv2d(96, 256, 5, 1, 2)
            ,nn.ReLU()
            ,nn.MaxPool2d(3, 2)
            
            # 连续3个卷积，使用更小的卷积窗口
            ,nn.Conv2d(256, 384, 3, 1, 1)
            ,nn.ReLU()
            ,nn.Conv2d(384, 384, 3, 1, 1)
            ,nn.ReLU()
            ,nn.Conv2d(384, 256, 3, 1, 1)
            ,nn.ReLU()
            ,nn.MaxPool2d(3, 2)
        )
        
        # 全连接层，使用dropout策略
        self.fc = nn.Sequential(
            nn.Linear(256*5*5, 4096)
            ,nn.ReLU()
            ,nn.Dropout(0.5)
            ,nn.Linear(4096, 4096)
            ,nn.ReLU()
            ,nn.Dropout(0.5)
            
            # 输出层
            ,nn.Linear(4096, 10)
        )
        
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output

In [3]:
net = AlexNet()

net

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (

In [4]:
# 读取数据集
import torchvision
import torchvision.transforms as transforms

def load_data_fashion_mnist(batch_size, resize=None):
    
    # 存储图像数据
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
        
    trans.append(transforms.ToTensor())
    transform = transforms.Compose(trans)
    
    mnist_train = torchvision.datasets.FashionMNIST(root='data/FashionMNIST'
                                                   ,train=True
                                                   ,download=True
                                                   ,transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root='data/FashionMNIST'
                                                  ,train=False
                                                  ,download=True
                                                  ,transform=transform)
    
    if sys.platform.startswith('win'):
        num_workers = 0
    else:
        num_workers = 4
        
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True
                                             , num_workers=num_workers)
    
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=True
                                            , num_workers=num_workers)
    
    return train_iter, test_iter

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

In [5]:
def evaluate_accuracy(data_iter, net, device=None):
    
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        if isinstance(net, torch.nn.Module):
            # 评估模式, 这会关闭dropout
            net.eval()
            # .cpu()保证可以进行数值加减
            acc_sum += (net(X.to(device)).argmax(dim=1).long() == y.to(device).long()).float().sum().cpu().item()
            # 改回训练模式
            net.train()
        # 自定义的模型, 2.13节之后不会用到, 不考虑GPU
        else:
            if ('is_training' in net.__code__.co_varnames):
                # 将is_training设置成False
                acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
            else:
                acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [6]:
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    
    # 将模型加载到指定运算器中
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            # 梯度清零
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch+1, train_l_sum/batch_count, train_acc_sum/n, test_acc, time.time()-start))

In [7]:
# 训练模型
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda
epoch 1, loss 0.7539, train acc 0.708, test acc 0.842, time 181.8 sec
epoch 2, loss 0.3572, train acc 0.867, test acc 0.857, time 181.1 sec
epoch 3, loss 0.2982, train acc 0.890, test acc 0.897, time 179.0 sec
epoch 4, loss 0.2684, train acc 0.900, test acc 0.895, time 178.6 sec
epoch 5, loss 0.2437, train acc 0.910, test acc 0.906, time 178.9 sec
