In [1]:
from torch import nn
from torch.nn import functional as F

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.RELU = nn.ReLU()
        # 灰度图输入通道为1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2)
        self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1)
        self.conv7 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)
        self.pool8 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.flatten = nn.Flatten()
        self.fc9 = nn.Linear(in_features=256*6*6, out_features=4096)
        self.fc10 = nn.Linear(in_features=4096, out_features=4096)
        self.fc11 = nn.Linear(in_features=4096, out_features=10)


    def forward(self, x):
        x = self.RELU(self.conv1(x))
        x = self.pool2(x)
        x = self.RELU(self.conv3(x))
        x = self.pool4(x)
        x = self.RELU(self.conv5(x))
        x = self.RELU(self.conv6(x))
        x = self.RELU(self.conv7(x))
        x = self.pool8(x)

        x = self.flatten(x)
        x = self.RELU(self.fc9(x))
        x = F.dropout(x, p=0.5)
        x = self.RELU(self.fc10(x))
        x = F.dropout(x, p=0.5)
        x = self.fc11(x)
        return x
        

In [2]:
import torch
from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet().to(device)

print(summary(model, (1, 227, 227)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 55, 55]          11,712
              ReLU-2           [-1, 96, 55, 55]               0
         MaxPool2d-3           [-1, 96, 27, 27]               0
            Conv2d-4          [-1, 256, 27, 27]         614,656
              ReLU-5          [-1, 256, 27, 27]               0
         MaxPool2d-6          [-1, 256, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         885,120
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 384, 13, 13]       1,327,488
             ReLU-10          [-1, 384, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         884,992
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
          Flatten-14                 [-

In [3]:
# 画图
import matplotlib.pyplot as plt

def matplot_train_process_data(train_process_data):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(train_process_data['epoch'], train_process_data['train_loss'], 'ro-',label='Train Loss')
    plt.plot(train_process_data['epoch'], train_process_data['val_loss'], 'bs-', label='Val Loss')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Train and Val Loss')

    plt.subplot(1, 2, 2)
    plt.plot(train_process_data['epoch'], train_process_data['train_acc'], 'ro-', label='Train Acc')
    plt.plot(train_process_data['epoch'], train_process_data['val_acc'], 'bs-', label='Val Acc')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Train and Val Accuracy')
    plt.show()

In [None]:
# 训练
import copy
import time
import torchvision.transforms as Transforms
import torchvision.datasets as Datasets
import torch.utils.data as Data
import pandas as pd


def train_val_data_process():
    # 加载数据集
    train_data = Datasets.FashionMNIST(root='./data', train=True, 
        transform=Transforms.Compose([
            Transforms.Resize((227, 227)),
            Transforms.ToTensor()
        ]), 
        download=True)
    
    # 划分训练集和验证集
    train_data, val_data = Data.random_split(train_data, 
    [round(len(train_data) * 0.8), round(len(train_data) * 0.2)])
    
    train_loader = Data.DataLoader(dataset=train_data, 
        batch_size=32,
        shuffle=True,
        num_workers=4)

    val_loader = Data.DataLoader(dataset=val_data, 
        batch_size=32,
        shuffle=True,
        num_workers=4)
    
    return train_loader, val_loader


def train_model_process(model, train_loader, val_loader, num_epochs=5):
    # 加载模型
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("当前设备：", device)
    model = model.to(device)

    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    # 训练损失
    train_loss_all = []
    # 训练准确率
    train_acc_all = []
    # 验证损失
    val_loss_all = []
    # 验证准确率
    val_acc_all = []

    since = time.time()
    # 训练
    for epoch in range(num_epochs):
        
        print(f"Epoch {epoch+1}/{num_epochs} - LR: {optimizer.param_groups[0]['lr']}")
        print('-' * 10)
        
        train_loss = 0.0
        train_corrects = 0.0

        val_loss = 0.0
        val_corrects = 0.0

        train_num = 0
        val_num = 0

        # 训练
        for step, (b_x, b_y) in enumerate(train_loader):
            # 数据移动到设备
            b_x = b_x.to(device)
            b_y = b_y.to(device)

            train_num += b_x.size(0)

            # 模型训练
            model.train()
            outputs = model(b_x)
            # 计算损失
            loss = criterion(outputs, b_y)
            
            # 梯度清零，防止梯度叠加
            optimizer.zero_grad()
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()

            # 计算准确率
            pre_lab = torch.argmax(outputs, dim=1)
            train_corrects += torch.sum(pre_lab == b_y)
            # 计算损失
            train_loss += loss.item() * b_x.size(0)
            
            # 计算验证损失和准确率
        for step, (b_x, b_y) in enumerate(val_loader):
            b_x = b_x.to(device)
            b_y = b_y.to(device)

            val_num += b_x.size(0)

            # 模型验证
            model.eval()
            outputs = model(b_x)
            loss = criterion(outputs, b_y)

            # 计算验证损失
            val_loss += loss.item() * b_x.size(0)
            pre_lab = torch.argmax(outputs, dim=1)
            val_corrects += torch.sum(pre_lab == b_y)

        # 计算训练损失和准确率
        train_loss_all.append(train_loss / train_num)
        train_acc_all.append(train_corrects.double().item() / train_num)
        val_loss_all.append(val_loss / val_num)
        val_acc_all.append(val_corrects.double().item() / val_num)

        print('Train loss: {:.4f} Train acc: {:.4f} | Val loss: {:.4f} Val acc: {:.4f}'
        .format(train_loss_all[-1], train_acc_all[-1], val_loss_all[-1], val_acc_all[-1]))

        # 保存最佳模型参数
    if val_acc_all[-1] > best_acc:
        best_acc = val_acc_all[-1]
        best_model_wts = copy.deepcopy(model.state_dict())
    

    time_elapsed = time.time() - since
    print(f"训练完成，用时 {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    
    # 保存最佳模型
    torch.save(best_model_wts, 'model/alexnet_best_model.pth')

    train_process_data = pd.DataFrame({
        'epoch': range(num_epochs),
        'train_loss': train_loss_all,
        'train_acc': train_acc_all,
        'val_loss': val_loss_all,
        'val_acc': val_acc_all
    })
    return train_process_data
    

model = AlexNet()
train_loader, val_loader = train_val_data_process()
train_process_data = train_model_process(model, train_loader, val_loader, num_epochs=20)

matplot_train_process_data(train_process_data)
    
    
    

## 云训练 

autodl

配置：\
GPU RTX 4090D(24GB) * 1 \
CPU 15 vCPU AMD EPYC 9754 128-Core Processor \
内存 80GB \
硬盘 系统盘:30 GB

镜像：\
PyTorch  2.1.0 \
Python  3.10(ubuntu22.04) \
CUDA  12.1

```
当前设备： cuda
Epoch 1/20 - LR: 0.001
----------
Train loss: 0.6402 Train acc: 0.7566 | Val loss: 0.3939 Val acc: 0.8494
Epoch 2/20 - LR: 0.001
----------
Train loss: 0.3425 Train acc: 0.8732 | Val loss: 0.3254 Val acc: 0.8791
Epoch 3/20 - LR: 0.001
----------
Train loss: 0.2870 Train acc: 0.8944 | Val loss: 0.2707 Val acc: 0.9022
Epoch 4/20 - LR: 0.001
----------
Train loss: 0.2531 Train acc: 0.9058 | Val loss: 0.2726 Val acc: 0.9003
Epoch 5/20 - LR: 0.001
----------
Train loss: 0.2309 Train acc: 0.9146 | Val loss: 0.2520 Val acc: 0.9095
Epoch 6/20 - LR: 0.001
----------
Train loss: 0.2136 Train acc: 0.9214 | Val loss: 0.2462 Val acc: 0.9128
Epoch 7/20 - LR: 0.001
----------
Train loss: 0.2055 Train acc: 0.9248 | Val loss: 0.2325 Val acc: 0.9140
Epoch 8/20 - LR: 0.001
----------
Train loss: 0.1859 Train acc: 0.9308 | Val loss: 0.2520 Val acc: 0.9107
Epoch 9/20 - LR: 0.001
----------
Train loss: 0.1765 Train acc: 0.9345 | Val loss: 0.2545 Val acc: 0.9087
Epoch 10/20 - LR: 0.001
----------
Train loss: 0.1660 Train acc: 0.9389 | Val loss: 0.2633 Val acc: 0.9113
Epoch 11/20 - LR: 0.001
----------
Train loss: 0.1504 Train acc: 0.9440 | Val loss: 0.2751 Val acc: 0.9122
Epoch 12/20 - LR: 0.001
----------
Train loss: 0.1454 Train acc: 0.9469 | Val loss: 0.2791 Val acc: 0.9172
Epoch 13/20 - LR: 0.001
----------
Train loss: 0.1336 Train acc: 0.9509 | Val loss: 0.2623 Val acc: 0.9107
Epoch 14/20 - LR: 0.001
----------
Train loss: 0.1265 Train acc: 0.9530 | Val loss: 0.2931 Val acc: 0.9083
Epoch 15/20 - LR: 0.001
----------
Train loss: 0.1221 Train acc: 0.9554 | Val loss: 0.2836 Val acc: 0.9148
Epoch 16/20 - LR: 0.001
----------
Train loss: 0.1182 Train acc: 0.9570 | Val loss: 0.3020 Val acc: 0.9165
Epoch 17/20 - LR: 0.001
----------
Train loss: 0.1053 Train acc: 0.9605 | Val loss: 0.3341 Val acc: 0.9127
Epoch 18/20 - LR: 0.001
----------
Train loss: 0.1137 Train acc: 0.9586 | Val loss: 0.3031 Val acc: 0.9176
Epoch 19/20 - LR: 0.001
----------
Train loss: 0.0951 Train acc: 0.9653 | Val loss: 0.3650 Val acc: 0.9153
Epoch 20/20 - LR: 0.001
----------
Train loss: 0.0903 Train acc: 0.9670 | Val loss: 0.3302 Val acc: 0.9140
训练完成，用时 4m 33s
```


In [None]:
# 测试
import torch
import torchvision.datasets as datasets
import torchvision.transforms as Transforms
import torch.utils.data as Data


def get_test_loader():
    # 数据预处理
    transforms = Transforms.Compose([
        Transforms.Resize(size=227),
        Transforms.ToTensor()
    ])

    # 加载测试数据
    test_dataset = datasets.FashionMNIST(root='./data', 
        train=False,
        transform=transforms, 
        download=True)
    
    test_loader = Data.DataLoader(dataset=test_dataset, 
        batch_size=1, 
        shuffle=True,
        num_workers=4)
    
    return test_loader

def test_model_process(model, test_loader):
    # 模型放入设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # 初始化参数
    test_corrects = 0.0
    test_num = 0

    # 只进行前向传播
    with torch.no_grad():
        for test_x, test_y in test_loader:
            test_x = test_x.to(device)
            test_y = test_y.to(device)

            test_num += test_x.size(0)

            outputs = model(test_x)
            model.eval()

            outputs = model(test_x)
            
            pre_lab = torch.argmax(outputs, dim=1)
            test_corrects += torch.sum(pre_lab == test_y)
    
    test_acc = test_corrects.double().item() / test_num
    print(f"测试准确率: {test_acc:.4f}")

model = AlexNet()
model.load_state_dict(torch.load('./alexnet/best_model.pth'))
test_loader = get_test_loader()
test_model_process(model=model, test_loader=test_loader)


测试准确率: 0.9082