## 识别羊驼和熊猫的分类模型
### 1. 准备数据集
### 200 张羊驼和熊猫 - 训练集
### 100 张羊驼和熊猫 - 测试集
### 2. 使用 datasets.ImageFolder 对手动整理的数据进行管理和加载

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets,transforms,models
import matplotlib.pyplot as plt
import os

In [None]:
# transforms.Compose() 函数可以包含多个数据预处理的方法
# 1. 训练集图片自适应缩小至最大边长为 230 的大小
# 2. 使用居中裁切的方式切割成 224 x 224 的小图
# 3. 为了增加训练集的多样性, 进行随机水平翻转
# 4. 将图像转化成 Tensor 格式
# 5. 进行均值为 0.5, 标准差为 0.5 的归一化
data_transforms = {
    'train': transforms.Compose([
        transforms.Scale(230),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'test': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
}

In [None]:
# 设置数据集的文件夹路径变量
data_directory = 'data' # 与代码文件同目录下的 data 文件夹

# datasets.ImageFolder(数据集的文件夹路径, 相应的数据预处理方式)
trainset = datasets.ImageFolder(os.path.join(data_directory, 'train'), data_transforms['train'])
testset = datasets.ImageFolder(os.path.join(data_directory, 'test'), data_transforms['test'])

# DataLoader(数据集, 批处理大小, 遍历不同批次的数据打乱顺序, 使用 4 个子进程来加载数据)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 5, shuffle = True, num_workers = 4)
testloader = torch.utils.data.DataLoader(testset, batch_size = 5, shuffle = True, num_workers = 4)

In [None]:
'''
# 使用 matplotlib 展示随机加载的训练样本
import matplotlib.pyplot as plt
def imshow(inputs):
    
    inputs = inputs / 2 + 0.5
    inputs = inputs.numpy().transpose((1, 2, 0))
    print(inputs)
    plt.imshow(inputs)
    plt.show()
    
inputs,classes = next(iter(trainloader))

imshow(torchvision.utils.make_grid(inputs))
'''

In [None]:
# torchvision 的 models 包含 AlexNet, VGG, ResNet, SqueezeNet
alexnet = models.alexnet(pretrained = True) # 选择预训练好的 AlexNet 模型
print(alexnet) # 打印模型结构
# features 模块 - 提取特征, 卷积为主
# classifier 模块 - 分类, 全连接为主

In [None]:
# 构造二元分类器 - 重新定义 classifier 
# 限制参数更新   
for param in alexnet.parameters():
    param.requires_grad = False 
    
# 重新定义 classifier 模块
# 重新定义的 classifier 模块(全连接层)的参数则默认保持 requires_grad = True, 从而可以保证迁移学习的过程中只更新全连接层的参数
alexnet.classifier = nn.Sequential(
    nn.Dropout(),
    nn.Linear(256*6*6, 4096),
    nn.ReLU(inplace = True),
    nn.Dropout(),
    nn.Linear(4096, 4096),
    nn.ReLU(inplace = True),
    nn.Linear(4096, 2),)

In [None]:
CUDA = torch.cuda.is_available()
if CUDA:
    alexnet = alexnet.cuda()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.classifier.parameters(), lr = 0.001, momentum = 0.9)

In [None]:
def train(model,criterion, optimizer, epochs = 1):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs,labels = data
            if CUDA:
                inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
            if i % 10 == 9:
                print('[Epoch: %d, Batch: %5d] Loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0
 
    print('Finished Training')

In [None]:
def test(testloader, model):
    correct = 0
    total = 0
    for data in testloader:
        images, labels = data
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print('Accuracy on the test set: %d %%' % (100 * correct / total))

In [None]:
def load_param(model, path):
    if os.path.exists(path):
        model.load_state_dict(torch.load(path))

In [None]:
def save_param(model, path):
    torch.save(model.state_dict(),path)

In [None]:
load_param(alexnet, 'tl_model.pkl')

In [None]:
train(alexnet, criterion, optimizer, epochs = 2)

In [None]:
save_param(alexnet, 'tl_model.pkl')

In [None]:
test(testloader, alexnet) # 经历 2 轮的遍历训练