torchvison模块有三大功能：

1. models（一些预训练好的经典模型架构）
2. transforms（一些图像预处理操作）
3. datasets（一些常用的公开数据集）


In [None]:
"""
参考链接：

我在这里回答了transforms的一些疑惑 https://stackoverflow.com/questions/51677788/data-augmentation-in-pytorch

尽管tranforms不改变datasets的大小，但是每个epoch都会执行transforms操作，就像每个epoch都会shuffle一样，

从而得到不同的数据，这也是为什么transforms里面都写成随机翻转、随机改变颜色的原因

微调视觉模型教程 https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html 

pytorch模型的保存与加载深入理解

"""

'\n参考链接：\n\n我在这里回答了transforms的一些疑惑 https://stackoverflow.com/questions/51677788/data-augmentation-in-pytorch\n\n尽管tranforms不改变datasets的大小，但是每个epoch都会执行transforms操作，就像每个epoch都会shuffle一样，\n\n从而得到不同的数据，这也是为什么transforms里面都写成随机翻转、随机改变颜色的原因\n\n微调视觉模型教程 https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html \n\npytorch模型的保存与加载深入理解\n\n'

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, models, datasets
# 官方文档 https://pytorch.org/vision/stable/index.html
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import imageio
import time
import warnings
import random
import sys
import copy
import json
from PIL import Image

# 准备数据

In [None]:
# 从我的谷歌云盘下载数据
from google.colab import drive
drive.mount('./drive')

%cp -ri /content/drive/MyDrive/Colab\ Notebooks/唐宇迪pytorch/datasets/flower_data.zip /content/
!unzip flower_data.zip

In [None]:
data_dir = "./flower_data/"
train_dir = "train/"
valid_dir = "valid/"

In [None]:
# 数据预处理代码
train_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomRotation(45),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.RandomGrayscale(p=0.025),
    transforms.ToTensor(),    # 转化为tensor格式并归一化到0-1
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

valid_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
])

In [None]:
batch_size = 8

train_datasets = datasets.ImageFolder(os.path.join(data_dir, train_dir), transform=train_transforms)
valid_datasets = datasets.ImageFolder(os.path.join(data_dir, valid_dir), transform=valid_transforms)

train_dataloader = torch.utils.data.DataLoader(train_datasets, batch_size=batch_size, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(valid_datasets, batch_size=batch_size, shuffle=False)

print("训练集数量：", len(train_datasets))
print("验证集数量：", len(valid_datasets))

print("类别与训练标签index对应：", train_datasets.class_to_idx)

训练集数量： 6552
验证集数量： 818
类别与训练标签index对应： {'1': 0, '10': 1, '100': 2, '101': 3, '102': 4, '11': 5, '12': 6, '13': 7, '14': 8, '15': 9, '16': 10, '17': 11, '18': 12, '19': 13, '2': 14, '20': 15, '21': 16, '22': 17, '23': 18, '24': 19, '25': 20, '26': 21, '27': 22, '28': 23, '29': 24, '3': 25, '30': 26, '31': 27, '32': 28, '33': 29, '34': 30, '35': 31, '36': 32, '37': 33, '38': 34, '39': 35, '4': 36, '40': 37, '41': 38, '42': 39, '43': 40, '44': 41, '45': 42, '46': 43, '47': 44, '48': 45, '49': 46, '5': 47, '50': 48, '51': 49, '52': 50, '53': 51, '54': 52, '55': 53, '56': 54, '57': 55, '58': 56, '59': 57, '6': 58, '60': 59, '61': 60, '62': 61, '63': 62, '64': 63, '65': 64, '66': 65, '67': 66, '68': 67, '69': 68, '7': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '8': 80, '80': 81, '81': 82, '82': 83, '83': 84, '84': 85, '85': 86, '86': 87, '87': 88, '88': 89, '89': 90, '9': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 

# 构建模型

In [None]:
# 冻结参数
def set_parameter_requires_grad(model, freeze_extractor):
  if freeze_extractor:
    for param in model.parameters():
      param.requires_grad = False

In [None]:
# 加载模型，修改全连接层
def initialize_model(model_name, num_classes, freeze_extractor=False, use_pretrained=True):
  if model_name == "resnet":
    model_ft = models.resnet50(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, freeze_extractor)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)

  elif model_name == "alexnet":
    model_ft = models.alexnet(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, freeze_extractor)
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)

  elif model_name == "vgg":
    model_ft = models.vgg16(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, freeze_extractor)
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)

  else:
    print("invalid model name")
    exit()    # 终止程序

  return model_ft

# 训练模型

In [None]:
num_classes = 102
num_epochs = 25
model_name = "resnet"
freeze_extractor = True    # 是否冻结特征提取层参数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
model_ft = initialize_model(model_name, num_classes, freeze_extractor)
model_ft = model_ft.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
# 损失函数设置
criterion = nn.CrossEntropyLoss()

In [None]:
# 优化器设置
print("Params to learn:")
if freeze_extractor:
  params_to_update = []
  for name,param in model_ft.named_parameters():
    if param.requires_grad == True:
      params_to_update.append(param)
      print("\t",name)
else:
  params_to_update = model_ft.parameters()
  for name,param in model_ft.named_parameters():
    if param.requires_grad == True:
      print("\t",name)

optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Params to learn:
	 fc.weight
	 fc.bias


In [None]:
# 训练与测试函数
def train_model(model, train_dataloader, valid_dataloader, criterion, optimizer, num_epochs=25):
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(num_epochs):
    print('Epoch{}/{}'.format(epoch, num_epochs-1))
    print("-"*10)

    # Each epoch has a training and validation phase
    model.train()
    running_loss = 0.0     # 每一个epoch的损失
    running_corrects = 0    # 每一个epoch预测正确的个数
    for inputs, labels in train_dataloader:
      inputs = inputs.to(device)
      labels = labels.to(device)
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      _,preds = torch.max(outputs, dim=1)    # 所在行的最大值和索引
      loss.backward()
      optimizer.step()
      # 统计
      running_loss += loss.item()*inputs.size(0)
      running_corrects += torch.sum(preds==labels.data)
    epoch_loss = running_loss / len(train_datasets)
    epoch_acc = running_corrects.double() / len(train_datasets)
    print('traing loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

    model.eval()
    running_loss = 0.0     # 每一个epoch的损失
    running_corrects = 0    # 每一个epoch预测正确的个数
    for inputs, labels in valid_dataloader:
      inputs = inputs.to(device)
      labels = labels.to(device)
      outputs = model(inputs)
      loss = criterion(outputs, labels)
      _,preds = torch.max(outputs, dim=1)
      running_loss += loss.item()*inputs.size(0)
      running_corrects += torch.sum(preds==labels.data)
    epoch_loss = running_loss / len(valid_datasets)
    epoch_acc = running_corrects.double() / len(valid_datasets)
    print('validation loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
    if epoch_acc > best_acc:
      best_acc = epoch_acc
      best_model_wts = copy.deepcopy(model.state_dict())
      torch.save(model, "best.pt")    # 保存整个模型

In [None]:
# 开始训练
train_model(model_ft, train_dataloader, valid_dataloader, criterion, optimizer_ft, num_epochs=25)

# 继续训练

继续训练所有的层

In [None]:
model_ft = torch.load("best.pt")

# 训练所有层
for param in model_ft.parameters():
  param.requires_grad = True

# 学习率可以设置的小点了
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)

# 损失函数设置
criterion = nn.CrossEntropyLoss()

In [None]:
train_model(model_ft, train_dataloader, valid_dataloader, criterion, optimizer_ft, num_epochs=10)

# 测试模型

测试测试集的数据（见训练与测试函数）

测试一个batch的数据

In [None]:
# 得到一个batch的数据
dataiter = iter(valid_dataloader)
images, labels = dataiter.next()
inputs = images.to(device)

# 加载训练时最好的模型
model = torch.load("best.pt", map_location=device)

# 预测
output = model(inputs)

# 得到概率最大的那个
print(output.shape)
_, preds_index = torch.max(output, dim=1)
preds = np.squeeze(preds_index.numpy()) if not torch.cuda.is_available() else np.squeeze(preds_index.cpu().numpy()) # squeeze删除为1的维度
print(preds)

测试一张图像

In [None]:
# 参考 https://github.com/ksatola/Image-Classifier-with-Deep-Learning/blob/master/predict.py 

def process_image(image_path):
    # 读取测试数据
    img = Image.open(image_path)
    # Resize,thumbnail方法只能进行缩小，所以进行了判断
    # https://blog.csdn.net/TeFuirnever/article/details/90597048 缩放函数的解释
    if img.size[0] > img.size[1]:
        img.thumbnail((10000, 256))    # any large value in these positions(10000) will work
    else:
        img.thumbnail((256, 10000))
    # Crop操作
    left_margin = (img.width-224)/2
    bottom_margin = (img.height-224)/2
    right_margin = left_margin + 224
    top_margin = bottom_margin + 224
    img = img.crop((left_margin, bottom_margin, right_margin,   
                      top_margin))
    # 相同的预处理方法
    img = np.array(img)/255
    mean = np.array([0.485, 0.456, 0.406]) #provided mean
    std = np.array([0.229, 0.224, 0.225]) #provided std
    img = (img - mean)/std
    
    # 注意颜色通道应该放在第一个位置
    # loads an image with HWC-layout (height, width, channels), while Pytorch requires CHW-layout.
    img = img.transpose((2, 0, 1))
    
    return img

# 加载模型与数据
model = torch.load("best.pt", map_location=device)
model.eval()
image = process_image(image)    # 上面定义的函数
image = torch.from_numpy(image).type(torch.FloatTensor)    # 转为tensor格式
image = image.unsqueeze(0)    # add batch of size 1 to image
input = images.to(device)

# 预测
output = model(input)
print(output)

# 输出结果
_, preds_index = torch.max(output, dim=1)
preds = np.squeeze(preds_index.numpy()) if not torch.cuda.is_available() else np.squeeze(preds_index.cpu().numpy()) # squeeze删除为1的维度
print(preds)