# 迁移学习微调训练图像分类模型

在自己的图像分类数据集上，使用ImageNet预训练图像分类模型初始化，改动分类层，迁移学习微调训练

GPU RTX 3060、CUDA v11.2

## 导入工具包

In [28]:
import time
import os
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 获取计算硬件
# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [2]:
from torchvision import transforms

# 定义训练集图像预处理：首先将图像随机裁剪成224*224的大小，然后进行随机水平翻转增强，接着将图像转换为PyTorch中的Tensor格式，最后将RGB通道的像素值进行归一化
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224), # 随机裁剪224*224大小的图像
    transforms.RandomHorizontalFlip(), # 随机水平翻转增强
    transforms.ToTensor(), # 将图像转换为Tensor格式
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 归一化处理，以便输入到神经网络中
])

# 定义测试集图像预处理-RCTN：首先将图像缩放到256*256的大小，然后从图像中心裁剪出224*224大小的图像，接着将图像转换为PyTorch中的Tensor格式，最后将RGB通道的像素值进行归一化
test_transform = transforms.Compose([
    transforms.Resize(256), # 将图像缩放到256*256大小
    transforms.CenterCrop(224), # 从图像中心裁剪出224*224大小的图像
    transforms.ToTensor(), # 将图像转换为Tensor格式
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], # 用于归一化的均值
        std=[0.229, 0.224, 0.225] # 用于归一化的标准差
    )
])


## 载入图像分类数据集

In [29]:
# 数据集文件夹路径
dataset_dir = 'jiyouguan_split'

In [30]:
train_path = os.path.join(dataset_dir, 'train')
test_path = os.path.join(dataset_dir, 'val')
print('训练集路径', train_path)
print('测试集路径', test_path)

from torchvision import datasets
# 载入训练集
train_dataset = datasets.ImageFolder(train_path, train_transform)
# 载入测试集
test_dataset = datasets.ImageFolder(test_path, test_transform)

print('训练集图像数量', len(train_dataset))
print('类别个数', len(train_dataset.classes))
print('各类别名称', train_dataset.classes)
print('测试集图像数量', len(test_dataset))
print('类别个数', len(test_dataset.classes))
print('各类别名称', test_dataset.classes)

训练集路径 jiyouguan_split/train
测试集路径 jiyouguan_split/val
训练集图像数量 1416
类别个数 4
各类别名称 ['body_broken', 'normal', 'side_broken', 'top_broken']
测试集图像数量 353
类别个数 4
各类别名称 ['body_broken', 'normal', 'side_broken', 'top_broken']


## 类别和索引号 映射字典

In [31]:
# 各类别名称
class_names = train_dataset.classes
n_class = len(class_names)
# 映射关系：类别 到 索引号
train_dataset.class_to_idx
# 映射关系：索引号 到 类别
idx_to_labels = {y:x for x,y in train_dataset.class_to_idx.items()}

In [32]:
idx_to_labels

{0: 'body_broken', 1: 'normal', 2: 'side_broken', 3: 'top_broken'}

## 类别和索引号 一一对应

In [33]:
# 各类别名称
class_names = train_dataset.classes
n_class = len(class_names)

In [34]:
class_names

['body_broken', 'normal', 'side_broken', 'top_broken']

In [35]:
# 映射关系：类别 到 索引号
train_dataset.class_to_idx

{'body_broken': 0, 'normal': 1, 'side_broken': 2, 'top_broken': 3}

In [36]:
# 映射关系：索引号 到 类别
idx_to_labels = {y:x for x,y in train_dataset.class_to_idx.items()}

In [37]:
idx_to_labels

{0: 'body_broken', 1: 'normal', 2: 'side_broken', 3: 'top_broken'}

In [38]:
# 保存为本地的 npy 文件
np.save('idx_to_labels.npy', idx_to_labels)
np.save('labels_to_idx.npy', train_dataset.class_to_idx)

## 定义数据加载器DataLoader

In [7]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

# 训练集的数据加载器，用于批量加载训练集数据
train_loader = DataLoader(train_dataset,  # 加载的训练集数据
                          batch_size=BATCH_SIZE,  # 批量数据大小
                          shuffle=True,  # 是否打乱数据
                          num_workers=4  # 用于数据读取的线程数
                         )

# 测试集的数据加载器，用于批量加载测试集数据
test_loader = DataLoader(test_dataset,  # 加载的测试集数据
                         batch_size=BATCH_SIZE,  # 批量数据大小
                         shuffle=False,  # 是否打乱数据
                         num_workers=4  # 用于数据读取的线程数
                        )
# 注释中解释了每行代码的作用，其中 DataLoader 是 PyTorch 中用于批量加载数据的类，
# BATCH_SIZE 表示每个批次中包含的样本数量。train_loader 和 test_loader 分别代
# 表训练集和测试集的数据加载器，shuffle 参数表示是否需要打乱数据，num_workers 参
# 数表示数据读取所需的线程数。


## 导入训练需使用的工具包

In [8]:
from torchvision import models
import torch.optim as optim
from torch.optim import lr_scheduler

### 选择一：只微调训练模型最后一层（全连接分类层）

In [9]:
# model = models.resnet18(pretrained=True) # 载入预训练模型

# # 修改全连接层，使得全连接层的输出与当前数据集类别数对应
# # 新建的层默认 requires_grad=True
# model.fc = nn.Linear(model.fc.in_features, n_class)

In [10]:
# model.fc

In [11]:
# # 只微调训练最后一层全连接层的参数，其它层冻结
# optimizer = optim.Adam(model.fc.parameters())

### 选择二：微调训练所有层

In [12]:
model = models.resnet18(pretrained=True) # 载入预训练模型

model.fc = nn.Linear(model.fc.in_features, n_class)

optimizer = optim.Adam(model.parameters())

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/featurize/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [13]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### 选择三：随机初始化模型全部权重，从头训练所有层

In [10]:
# model = models.resnet18(pretrained=False) # 只载入模型结构，不载入预训练权重参数

# model.fc = nn.Linear(model.fc.in_features, n_class)

# optimizer = optim.Adam(model.parameters())

In [14]:
# import torchvision.models as models

# print(dir(models))


## 训练配置

In [15]:
model = model.to(device)

# 交叉熵损失函数
criterion = nn.CrossEntropyLoss() 

# 训练轮次 Epoch
EPOCHS = 30

# 学习率降低策略
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# 学习率衰减策略，使用StepLR学习率衰减器，每5个epoch将学习率乘以0.5。
# 学习率衰减策略是深度学习中常用的优化方法之一，通过在训练过程中逐步降低学习率，使得模型在接近收敛时能够更好地探索局部最优解，从而提高模型性能和泛化能力。

# 在上面的代码中，使用了StepLR学习率衰减器。该衰减器按照一定的步长(step_size)将学习率进行衰减，衰减率由gamma参数控制。具体来说，每step_size个epoch，将当前的学习率乘以gamma，这样就能够逐渐降低学习率，使模型更好地收敛。

# 在这里，使用了step_size=5和gamma=0.5，意味着在每5个epoch时将当前学习率乘以0.5，从而逐渐减小学习率。这种学习率衰减策略可以帮助模型更快地收敛，同时避免过拟合。

## 函数：在训练集上训练

In [16]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [17]:
def train_one_batch(images, labels):
    '''
    运行一个 batch 的训练，返回当前 batch 的训练日志
    '''
    
    # 获得一个 batch 的数据和标注
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = model(images) # 输入模型，执行前向预测
    loss = criterion(outputs, labels) # 计算当前 batch 中，每个样本的平均交叉熵损失函数值
    
    # 优化更新权重
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 获取当前 batch 的标签类别和预测类别
    _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
    preds = preds.cpu().numpy()
    loss = loss.detach().cpu().numpy()
    outputs = outputs.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    
    log_train = {}
    log_train['epoch'] = epoch
    log_train['batch'] = batch_idx
    # 计算分类评估指标
    log_train['train_loss'] = loss
    log_train['train_accuracy'] = accuracy_score(labels, preds)
    # log_train['train_precision'] = precision_score(labels, preds, average='macro')
    # log_train['train_recall'] = recall_score(labels, preds, average='macro')
    # log_train['train_f1-score'] = f1_score(labels, preds, average='macro')
    
    return log_train

## 函数：在整个测试集上评估

In [18]:
def evaluate_testset():
    '''
    在整个测试集上评估，返回分类评估指标日志
    '''

    loss_list = []
    labels_list = []
    preds_list = []
    
    with torch.no_grad():
        for images, labels in test_loader: # 生成一个 batch 的数据和标注
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images) # 输入模型，执行前向预测

            # 获取整个测试集的标签类别和预测类别
            _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
            preds = preds.cpu().numpy()
            loss = criterion(outputs, labels) # 由 logit，计算当前 batch 中，每个样本的平均交叉熵损失函数值
            loss = loss.detach().cpu().numpy()
            outputs = outputs.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            loss_list.append(loss)
            labels_list.extend(labels)
            preds_list.extend(preds)
        
    log_test = {}
    log_test['epoch'] = epoch
    
    # 计算分类评估指标
    log_test['test_loss'] = np.mean(loss)
    log_test['test_accuracy'] = accuracy_score(labels_list, preds_list)
    log_test['test_precision'] = precision_score(labels_list, preds_list, average='macro')
    log_test['test_recall'] = recall_score(labels_list, preds_list, average='macro')
    log_test['test_f1-score'] = f1_score(labels_list, preds_list, average='macro')
    
    return log_test

## 训练开始之前，记录日志

In [19]:
epoch = 0
batch_idx = 0
best_test_accuracy = 0

In [20]:
# 训练日志-训练集
df_train_log = pd.DataFrame()
log_train = {}
log_train['epoch'] = 0
log_train['batch'] = 0
images, labels = next(iter(train_loader))
log_train.update(train_one_batch(images, labels))
df_train_log = df_train_log.append(log_train, ignore_index=True)

In [21]:
df_train_log

Unnamed: 0,epoch,batch,train_loss,train_accuracy
0,0.0,0.0,2.0916054,0.09375


In [22]:
# 训练日志-测试集
df_test_log = pd.DataFrame()
log_test = {}
log_test['epoch'] = 0
log_test.update(evaluate_testset())
df_test_log = df_test_log.append(log_test, ignore_index=True)

In [23]:
df_test_log

Unnamed: 0,epoch,test_loss,test_accuracy,test_precision,test_recall,test_f1-score
0,0.0,1.994376,0.339943,0.325197,0.353257,0.303659


## 登录wandb

1.安装 wandb：pip install wandb

2.登录 wandb：在命令行中运行wandb login

3.按提示复制粘贴API Key至命令行中

## 创建wandb可视化项目

In [24]:
import wandb

wandb.init(project='毕业设计', name=time.strftime('%m%d%H%M%S'))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

 ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/featurize/.netrc


## 运行训练

In [25]:
for epoch in range(1, EPOCHS+1):
    
    print(f'Epoch {epoch}/{EPOCHS}')
    
    ## 训练阶段
    model.train()
    for images, labels in tqdm(train_loader): # 获得一个 batch 的数据和标注
        batch_idx += 1
        log_train = train_one_batch(images, labels)
        df_train_log = df_train_log.append(log_train, ignore_index=True)
        wandb.log(log_train)
        
    lr_scheduler.step()

    ## 测试阶段
    model.eval()
    log_test = evaluate_testset()
    df_test_log = df_test_log.append(log_test, ignore_index=True)
    wandb.log(log_test)
    
    # 保存最新的最佳模型文件
    if log_test['test_accuracy'] > best_test_accuracy: 
        # 删除旧的最佳模型文件(如有)
        old_best_checkpoint_path = 'checkpoints/best-{:.3f}.pth'.format(best_test_accuracy)
        if os.path.exists(old_best_checkpoint_path):
            os.remove(old_best_checkpoint_path)
        # 保存新的最佳模型文件
        new_best_checkpoint_path = 'checkpoints/best-{:.3f}.pth'.format(log_test['test_accuracy'])
        torch.save(model, new_best_checkpoint_path)
        print('保存新的最佳模型', 'checkpoints/best-{:.3f}.pth'.format(best_test_accuracy))
        best_test_accuracy = log_test['test_accuracy']

df_train_log.to_csv('训练日志-训练集.csv', index=False)
df_test_log.to_csv('训练日志-测试集.csv', index=False)

Epoch 1/30


100%|██████████| 45/45 [00:13<00:00,  3.39it/s]


保存新的最佳模型 checkpoints/best-0.000.pth
Epoch 2/30


100%|██████████| 45/45 [00:13<00:00,  3.44it/s]


保存新的最佳模型 checkpoints/best-0.561.pth
Epoch 3/30


100%|██████████| 45/45 [00:13<00:00,  3.46it/s]


保存新的最佳模型 checkpoints/best-0.635.pth
Epoch 4/30


100%|██████████| 45/45 [00:13<00:00,  3.41it/s]


Epoch 5/30


100%|██████████| 45/45 [00:13<00:00,  3.42it/s]


Epoch 6/30


100%|██████████| 45/45 [00:13<00:00,  3.40it/s]


保存新的最佳模型 checkpoints/best-0.745.pth
Epoch 7/30


100%|██████████| 45/45 [00:13<00:00,  3.26it/s]


保存新的最佳模型 checkpoints/best-0.892.pth
Epoch 8/30


100%|██████████| 45/45 [00:13<00:00,  3.41it/s]


Epoch 9/30


100%|██████████| 45/45 [00:13<00:00,  3.44it/s]


保存新的最佳模型 checkpoints/best-0.926.pth
Epoch 10/30


100%|██████████| 45/45 [00:12<00:00,  3.48it/s]


Epoch 11/30


100%|██████████| 45/45 [00:13<00:00,  3.34it/s]


保存新的最佳模型 checkpoints/best-0.938.pth
Epoch 12/30


100%|██████████| 45/45 [00:13<00:00,  3.38it/s]


保存新的最佳模型 checkpoints/best-0.966.pth
Epoch 13/30


100%|██████████| 45/45 [00:12<00:00,  3.47it/s]


Epoch 14/30


100%|██████████| 45/45 [00:13<00:00,  3.43it/s]


保存新的最佳模型 checkpoints/best-0.983.pth
Epoch 15/30


100%|██████████| 45/45 [00:13<00:00,  3.46it/s]


Epoch 16/30


100%|██████████| 45/45 [00:12<00:00,  3.47it/s]


Epoch 17/30


100%|██████████| 45/45 [00:13<00:00,  3.45it/s]


保存新的最佳模型 checkpoints/best-0.992.pth
Epoch 18/30


100%|██████████| 45/45 [00:13<00:00,  3.38it/s]


保存新的最佳模型 checkpoints/best-0.994.pth
Epoch 19/30


100%|██████████| 45/45 [00:13<00:00,  3.39it/s]


Epoch 20/30


100%|██████████| 45/45 [00:13<00:00,  3.37it/s]


Epoch 21/30


100%|██████████| 45/45 [00:13<00:00,  3.43it/s]


Epoch 22/30


100%|██████████| 45/45 [00:13<00:00,  3.38it/s]


Epoch 23/30


100%|██████████| 45/45 [00:13<00:00,  3.33it/s]


Epoch 24/30


100%|██████████| 45/45 [00:13<00:00,  3.34it/s]


Epoch 25/30


100%|██████████| 45/45 [00:13<00:00,  3.41it/s]


Epoch 26/30


100%|██████████| 45/45 [00:13<00:00,  3.30it/s]


Epoch 27/30


100%|██████████| 45/45 [00:13<00:00,  3.39it/s]


Epoch 28/30


100%|██████████| 45/45 [00:13<00:00,  3.41it/s]


Epoch 29/30


100%|██████████| 45/45 [00:13<00:00,  3.40it/s]


Epoch 30/30


100%|██████████| 45/45 [00:13<00:00,  3.42it/s]


## 在测试集上评价

In [26]:
# 载入最佳模型作为当前模型
model = torch.load('checkpoints/best-{:.3f}.pth'.format(best_test_accuracy))

In [None]:
model.eval()
print(evaluate_testset())

{'epoch': 30, 'test_loss': 0.0007636015, 'test_accuracy': 1.0, 'test_precision': 1.0, 'test_recall': 1.0, 'test_f1-score': 1.0}
