In [1]:
%matplotlib inline
import torch
import torchvision
from torch import nn
import utils
from pathlib import Path
from PIL import Image
import pandas as pd
import json
import numpy as np

In [2]:
data_dir = Path(r'../data/classify-leaves')
data_csv_train = data_dir / 'train.csv'
data_csv_test = data_dir / 'test.csv'
data_images = data_dir / 'images'
resnet34_state_dict_path = Path('../data/resnet34_state_dict.sd')

In [3]:
class LeavesDataset(torch.utils.data.Dataset):
    """
        树叶数据集
    """
    def __init__(self, data_dir: Path, data_csv: Path, transforms=None, is_train=True):
        """
            data_dir: 数据集目录
            data_csv: 数据-类对应文件
            transforms：数据增广
        """
        self.data_dir = data_dir
        self.data_csv = data_csv
        self.class_mapping_path = data_dir / 'class_mapping.json'
        self.transforms = transforms
        self.is_train = is_train
        self.class_mapping_dict = dict()
        self.data_list = self.read_data()
        
    def read_data(self):
        """
            读取数据
        """
        df = pd.read_csv(self.data_csv.as_posix())
        if not self.class_mapping_path.exists():
            unique_labels = df['label'].unique() 
            for i, class_name in enumerate(unique_labels):
                self.class_mapping_dict[str(i)] = class_name
                self.class_mapping_dict[class_name] = i
            with open(self.class_mapping_path.as_posix(), 'w', encoding='utf8') as fp:
                json.dump(self.class_mapping_dict, fp)
        else:
            with open(self.class_mapping_path.as_posix(), 'r', encoding='utf8') as fp:
                self.class_mapping_dict = json.load(fp)
        return df.values.tolist()
    
    def __getitem__(self, i):
        # print(self.data_list[i])
        if self.is_train:
            _img_path, label = self.data_list[i]
        else:
            _img_path = self.data_list[i]
            if isinstance(_img_path, list):
                _img_path = _img_path[0]
        img_path = self.data_dir / _img_path
        img = Image.open(img_path.as_posix()).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)
        return (img, self.class_mapping_dict[label]) if self.is_train else img
    
    def __len__(self):
        return len(self.data_list)

In [4]:
# 使用RGB通道的均值和标准差，以标准化每个通道
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_augs = torchvision.transforms.Compose([
    torchvision.transforms.RandomResizedCrop(224),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    normalize])

test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize([224, 224]),
    torchvision.transforms.ToTensor(),
    normalize])

In [5]:
leaves_dataset_train = LeavesDataset(data_dir, data_csv_train, transforms=train_augs)
leaves_dataset_test = LeavesDataset(data_dir, data_csv_test, transforms=test_augs, is_train=False)

In [6]:
train_dataset_len = int(0.75 * len(leaves_dataset_train))
test_dataset_len = len(leaves_dataset_train) - train_dataset_len
train_dataset, test_dataset = torch.utils.data.random_split(
    dataset=leaves_dataset_train,
    lengths=[train_dataset_len, test_dataset_len],
    generator=torch.Generator().manual_seed(0)
)

In [7]:
pretrained_net = torchvision.models.resnet34(pretrained=True)
pretrained_net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
pretrained_net.fc

Linear(in_features=512, out_features=1000, bias=True)

In [9]:
# 变更最后一层fc，将输出类别设置为当前需要的176
pretrained_net.fc = nn.Linear(pretrained_net.fc.in_features, 176)
nn.init.xavier_uniform_(pretrained_net.fc.weight);
pretrained_net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [10]:
# 如果param_group=True，输出层中的模型参数将使用十倍的学习率
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5, param_group=True, freeze=True):
    train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
    devices = utils.try_all_gpus()
    loss = nn.CrossEntropyLoss(reduction="none")
    if freeze:
        for name, param in net.named_parameters():
            if name not in ["fc.weight", "fc.bias"]:
                param.requires_grad = False
    if param_group:
        params_1x = [param for name, param in net.named_parameters()
             if name not in ["fc.weight", "fc.bias"]]
        trainer = torch.optim.SGD(
            [
                {'params': params_1x}, 
                {'params': net.fc.parameters(), 
                 'lr': learning_rate * 20}
            ], 
            lr=learning_rate, 
            weight_decay=0.001
        )
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.001)
    utils.train_gpus(net, train_iter, test_iter, loss, trainer, num_epochs, devices)

In [11]:
train_fine_tuning(pretrained_net, learning_rate=5e-5, batch_size=64, num_epochs=20, freeze=False)

epoch: 1/20, loss 3.931, train acc 0.180, test acc 0.381
epoch: 2/20, loss 2.172, train acc 0.463, test acc 0.516
epoch: 3/20, loss 1.627, train acc 0.583, test acc 0.587
epoch: 4/20, loss 1.333, train acc 0.650, test acc 0.652
epoch: 5/20, loss 1.150, train acc 0.702, test acc 0.692
epoch: 6/20, loss 1.042, train acc 0.729, test acc 0.715
epoch: 7/20, loss 0.952, train acc 0.751, test acc 0.732
epoch: 8/20, loss 0.873, train acc 0.770, test acc 0.753
epoch: 9/20, loss 0.830, train acc 0.780, test acc 0.753
epoch: 10/20, loss 0.752, train acc 0.802, test acc 0.763
epoch: 11/20, loss 0.694, train acc 0.819, test acc 0.772
epoch: 12/20, loss 0.698, train acc 0.814, test acc 0.790
epoch: 13/20, loss 0.680, train acc 0.822, test acc 0.800
epoch: 14/20, loss 0.643, train acc 0.833, test acc 0.806
epoch: 15/20, loss 0.598, train acc 0.841, test acc 0.796
epoch: 16/20, loss 0.588, train acc 0.846, test acc 0.808
epoch: 17/20, loss 0.570, train acc 0.853, test acc 0.805
epoch: 18/20, loss 0.55

In [12]:
# 保存训练好的模型weight
torch.save(pretrained_net.state_dict(), resnet34_state_dict_path.as_posix())

In [13]:
# 加载训练好的模型
pretrained_net = None
devices = utils.try_all_gpus()
# 加载模型参数state_dict
net = torchvision.models.resnet34()
# 变更最后一层fc，将输出类别设置为当前需要的176
net.fc = nn.Linear(net.fc.in_features, 176)
net.load_state_dict(torch.load(resnet34_state_dict_path.as_posix()))
# net.to(torch.device('cpu'))
net.to(devices[0])
net.eval()

test_iter = torch.utils.data.DataLoader(leaves_dataset_test, batch_size=32)

In [14]:
y_all = torch.Tensor([], device=torch.device('cpu'))

In [15]:
for i, X in enumerate(test_iter):
    # X = X.to(torch.device('cpu'))
    X = X.to(devices[0])
    y_hat = net(X)
    y_hat = y_hat.argmax(axis=1)
    y_hat = y_hat.to(torch.device('cpu'))
    y_all = torch.cat([y_all, y_hat], dim=0)
    print(i, y_all.shape, end='\r')

274 torch.Size([8800])

In [16]:
class_mapping_path = data_dir / 'class_mapping.json'
class_mapping_dict = dict()
with open(class_mapping_path.as_posix(), 'r', encoding='utf8') as fp:
    class_mapping_dict = json.load(fp)

In [17]:
y_all[:10]

tensor([  7.,  96., 136.,  51.,  58.,  20., 104., 110.,   9.,  50.])

In [18]:
y_all = y_all.type(torch.int)
y_all[:10]

tensor([  7,  96, 136,  51,  58,  20, 104, 110,   9,  50], dtype=torch.int32)

In [19]:
y_all_mapping_list = list()
for i in y_all:
    i_num = i.item()
    i_mapping = class_mapping_dict.get(str(i_num))
    y_all_mapping_list.append(i_mapping)

In [20]:
len(y_all_mapping_list)

8800

In [21]:
len(leaves_dataset_test.data_list), leaves_dataset_test.data_list[0]

(8800, ['images/18353.jpg'])

In [22]:
image_path_list = [i[0] for i in leaves_dataset_test.data_list]
len(image_path_list), image_path_list[0]

(8800, 'images/18353.jpg')

In [23]:
submission_data = {
    'image': image_path_list,
    'label': y_all_mapping_list
}

In [24]:
y_df = pd.DataFrame(submission_data)
y_df.head(5)

Unnamed: 0,image,label
0,images/18353.jpg,asimina_triloba
1,images/18354.jpg,populus_grandidentata
2,images/18355.jpg,platanus_acerifolia
3,images/18356.jpg,pinus_bungeana
4,images/18357.jpg,platanus_occidentalis


In [26]:
y_df.to_csv((data_dir / 'submission.csv').as_posix(), index=False)