# 基于经典网络架构训练图像分类模型

### 数据预处理部分：

- 数据增强：torchvision中transforms模块自带功能，比较实用
- 数据预处理：torchvision中transforms也帮我们实现好了，直接调用即可
- DataLoader模块直接读取batch数据

### 网络模块设置：

- 加载预训练模型，torchvision中有很多经典网络架构，调用起来十分方便，并且可以用人家训练好的权重参数来继续训练，也就是所谓的迁移学习
- 需要注意的是别人训练好的任务跟咱们的可不是完全一样，需要把最后的head层改一改，一般也就是最后的全连接层，改成咱们自己的任务
- 训练时可以全部重头训练，也可以只训练最后咱们任务的层，因为前几层都是做特征提取的，本质任务目标是一致的

### 网络模型保存与测试
- 模型保存的时候可以带有选择性，例如在验证集中如果当前效果好则保存
- 读取模型进行实际测试

![title](1.png)

In [56]:
import os
import matplotlib.pyplot as plt
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.optim as optim
import torchvision
from torchvision import transforms, models, datasets
import imageio
import time
import warnings

warnings.filterwarnings("ignore")
import random
import sys
import copy
import json
from PIL import Image

In [57]:
data_dir = './flower_data/'

### 制作好数据源：
- data_transforms中指定了所有图像预处理操作
- ImageFolder假设所有的文件按文件夹保存好，每个文件夹下面存贮同一类别的图片，文件夹的名字为分类的名字

In [58]:
data_transforms = {
    'train':
        transforms.Compose(
            [
                transforms.Resize([96, 96]),  #对所有图片重新调整大小，保持一致
                transforms.RandomRotation(45),  #随机旋转,-45°到45°之间随机选 ，强化数据，生成更多数据
                transforms.CenterCrop(64),  #从中心选择，裁剪图片保留64
                transforms.RandomHorizontalFlip(p=0.5),  #随机水平翻转
                transforms.RandomVerticalFlip(p=0.5),  #随机垂直翻转
                transforms.ColorJitter(brightness=0.2, contrast=0.1),  #brightness 亮度，contrast 对比度，saturation 饱和度 hue 色相
                transforms.RandomGrayscale(p=0.025),  # 随机将一些转换为灰度图，p随即率
                transforms.ToTensor(),  #转换为tensor数据，ToTensor() 会将图像像素值从 [0, 255] 缩放到 [0, 1]
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  #均值，标准差 三个通道的均值，标准差，数值是自然图像得到的统计结果
            ]
        ),
    'valid':
        transforms.Compose(
            [
                transforms.Resize([64, 64]),
                transforms.ToTensor(),
                transforms.Normalize([0.4488, 0.4371, 0.4040], [0.229, 0.224, 0.225])
            ]
        )
}

In [59]:
bat_size = 128
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=bat_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

In [60]:
image_datasets

{'train': Dataset ImageFolder
     Number of datapoints: 6552
     Root location: ./flower_data/train
     StandardTransform
 Transform: Compose(
                Resize(size=[96, 96], interpolation=bilinear, max_size=None, antialias=True)
                RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
                CenterCrop(size=(64, 64))
                RandomHorizontalFlip(p=0.5)
                RandomVerticalFlip(p=0.5)
                ColorJitter(brightness=(0.8, 1.2), contrast=(0.9, 1.1), saturation=None, hue=None)
                RandomGrayscale(p=0.025)
                ToTensor()
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ),
 'valid': Dataset ImageFolder
     Number of datapoints: 818
     Root location: ./flower_data/valid
     StandardTransform
 Transform: Compose(
                Resize(size=[64, 64], interpolation=bilinear, max_size=None, antialias=True)
                ToTensor()
      

In [61]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x2953c0da0>,
 'valid': <torch.utils.data.dataloader.DataLoader at 0x2953c15e0>}

In [62]:
dataset_sizes

{'train': 6552, 'valid': 818}

读取标签对应的实际名字

In [63]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

In [64]:
cat_to_name

{'21': 'fire lily',
 '3': 'canterbury bells',
 '45': 'bolero deep blue',
 '1': 'pink primrose',
 '34': 'mexican aster',
 '27': 'prince of wales feathers',
 '7': 'moon orchid',
 '16': 'globe-flower',
 '25': 'grape hyacinth',
 '26': 'corn poppy',
 '79': 'toad lily',
 '39': 'siam tulip',
 '24': 'red ginger',
 '67': 'spring crocus',
 '35': 'alpine sea holly',
 '32': 'garden phlox',
 '10': 'globe thistle',
 '6': 'tiger lily',
 '93': 'ball moss',
 '33': 'love in the mist',
 '9': 'monkshood',
 '102': 'blackberry lily',
 '14': 'spear thistle',
 '19': 'balloon flower',
 '100': 'blanket flower',
 '13': 'king protea',
 '49': 'oxeye daisy',
 '15': 'yellow iris',
 '61': 'cautleya spicata',
 '31': 'carnation',
 '64': 'silverbush',
 '68': 'bearded iris',
 '63': 'black-eyed susan',
 '69': 'windflower',
 '62': 'japanese anemone',
 '20': 'giant white arum lily',
 '38': 'great masterwort',
 '4': 'sweet pea',
 '86': 'tree mallow',
 '101': 'trumpet creeper',
 '42': 'daffodil',
 '22': 'pincushion flower',
 

### 加载models中提供的模型，并且直接用训练的好权重当做初始化参数
- 第一次执行需要下载，可能会比较慢，我会提供给大家一份下载好的，可以直接放到相应路径

In [65]:
model_name = 'resnet'  # 可选的比较多[resnet,densenet,vgg,squeezenet...] 经典网络，目前使用较广泛的一个是reshet
feature_extract = True  # 都用人家特征，先不更新

In [66]:
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('cuda is not available training on cpu')
else:
    print('cuda is available training on gpu')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

cuda is not available training on cpu


#模型参数不更新，除了输出层先冻结

In [67]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [68]:
model_fit = models.resnet18()  #18层，速度快，效果差，性能好可以选大的
model_fit

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

输出结果可以看到，resnet18的最后一层fc全连接的输出是 1000，而我们需要的是102分类，所以需要修改

In [69]:
def initialize_model(model_name, num_class, feature_extract, use_pretrained=True):
    model_fit = models.resnet18(pretrained=use_pretrained)
    set_parameter_requires_grad(model_fit,
                                feature_extract)  # feature_extract  决定是否 冻结模型参数。True 参数不更新，仅训练新添加的分类层 False 微调模式
    num_ftrs = model_fit.fc.in_features  # fc的输入
    # 后增加的，requires_grad依旧为true
    model_fit.fc = nn.Linear(num_ftrs, num_class)  #输出类别数量 102
    input_size = 64  #输入
    return model_fit, input_size

In [70]:
model_ft, input_size = initialize_model(model_name, 102, feature_extract)
model_fit = model_ft.to(device)

#模型保存
filename = 'checkpoint.pth'
params_to_update = model_ft.parameters()
#是否训练所有层
if (feature_extract):
    params_to_update = []
    for name, param in model_fit.named_parameters():
        if param.requires_grad:
            params_to_update.append(param)
            print('\t', 1, name)
else:
    for name, param in model_fit.named_parameters():
        if param.requires_grad:
            print('\t', 2, name)

	 1 fc.weight
	 1 fc.bias


In [71]:
#优化器配置
optimizer_fit = optim.Adam(params_to_update, lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer_fit, step_size=10,
                                      gamma=0.1)  #  step_size=10,   # 每隔多少 epoch 调整一次学习率    gamma=0.1       # 学习率衰减的乘法因子
criterion = nn.CrossEntropyLoss()  #损失函数，适用于分类任务

# 训练模块

In [72]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, filename='best.pt'):
    #记录时间   
    since = time.time()
    #记录验证集预测结果最好的 初始化
    best_acc = 0
    #模型放cpu/gpu
    model_fit.to(device)
    #训练过程需要打印指标 验证集acc，训练集acc，各自损失
    train_losses, train_acc = [], []
    validation_losses, validation_acc = [], []
    #学习率获取
    l_r = [optimizer_fit.param_groups[0]['lr']]
    #初始化最好的模型 copy state_dict
    best_wt = copy.deepcopy(model_fit.state_dict())
    #遍历epochs
    print(num_epochs)
    for epoch in range(num_epochs):
        #打印当前epoch/epoch总数量
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('*' * 10)
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            #分别求训练集，验证集 acc 和loss    
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]:
                inputs.to(device)
                labels.to(device)
                #清零
                optimizer.zero_grad()
                #只要训练的时候计算和更新进度
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.max(outputs, 1)[1]  #第一个1是在一行中找最大的列，第二个是下标
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)  #size(0) 张量第0维的大小，当前batch的数量
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)  # 算平均
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            time_elapsed = time.time() - since  #一个epoch我浪费了多少时间
            print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            #得到最好的模型
            if (phase == 'valid' and epoch_acc > best_acc):
                best_acc = epoch_acc
                best_wt = copy.deepcopy(model_fit.state_dict())
                state = {
                    'state_dict': model_fit.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer_fit.state_dict()
                }
                torch.save(state, filename)
            if phase == 'valid':
                validation_acc.append(epoch_acc)
                validation_losses.append(epoch_loss)
            if phase == 'test':
                train_acc.append(epoch_acc)
                train_losses.append(epoch_loss)

        print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))
        l_r.append(optimizer_fit.param_groups[0]['lr'])
        scheduler.step()  #进行学习率衰减

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    #训练完后用最好的一次做模型最终的结果，一会测试
    model.load_state_dict(best_wt)
    return model_fit, validation_losses, validation_acc, train_losses, train_acc, l_r

In [None]:
model_fit, validation_losses, validation_acc, train_losses, train_acc, l_r = train_model(model_fit, dataloaders,
                                                                                         criterion, optimizer_fit,
                                                                                         num_epochs=20)

20
Epoch 1/20
**********


In [None]:
#得到最后一层权重后，训练所有层
for param in model_fit.parameters():
    param.requires_grad = True

optimizer_fit = optimizer_fit.Adam(model_fit.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer_fit, step_size=7, gamma=0.1)
criterion = nn.CrossEntropyLoss()

In [None]:
#加载之前的权重参数
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model_fit.load_state_dict(checkpoint['state_dict'])

In [None]:
model_fit, validation_losses, validation_acc, train_losses, train_acc, l_r = train_model(model_fit, dataloaders,
                                                                                         criterion, optimizer_fit,
                                                                                         num_epochs=20)

In [None]:
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)

# GPU模式
model_ft = model_ft.to(device)

#保存文件的名字
filename = 'best.pt'

# 加载模型
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model_ft.load_state_dict(checkpoint['state_dict'])

### 测试数据预处理

- 测试数据处理方法需要跟训练时一致才可以 
- crop操作的目的是保证输入的大小是一致的
- 标准化操作也是必须的，用跟训练数据相同的mean和std,但是需要注意一点训练数据是在0-1上进行标准化，所以测试数据也需要先归一化
- 最后一点，PyTorch中颜色通道是第一个维度，跟很多工具包都不一样，需要转换


In [None]:
dataiter = iter(dataloaders['valid'])
images, laels = dataiter.next()
model_ft.eval()
if train_on_gpu:
    output = model_ft(images.cuda)
else:
    output = model_ft(images)

In [None]:
output.shape

In [None]:
_, pred = torch.max(output, 1)

In [None]:
preds = np.squeeze(pred.numpy()) if not train_on_gpu else np.squeeze(pred.cpu().numpy())
preds

#展示预测结果

In [None]:
def im_convert(tensor):
    image = tensor.to('cpu').clone().detach()  #画图需要使用numpy 使用的cpu
    image = image.numpy().squeeze() # 转换为numpy squeeze 压缩1的维度 1*3*64*64-> 3*64*64
    image = image.transpose(1, 2, 0) # plt 中对应的长宽通道，原来的是通道长宽
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406)) #对标准化后的数据还原
    image = image.clip(0, 1)# 防止还原后的数据超过9，1范围

In [None]:
fig = plt.figure(figsize=(20, 20))
columns = 4
rows = 2

for idx in range(columns * rows):
    ax = fig.add_subplot(rows, columns, idx + 1, xticks=[], yticks=[])
    plt.imshow(im_convert(images[idx]))
    ax.set_title("{} ({})".format(cat_to_name[str(preds[idx])], cat_to_name[str(cat_to_name[idx].item())]),
                 color=("green" if cat_to_name[str(preds[idx])] == cat_to_name[str(cat_to_name[idx].item())] else "red"))
plt.show()