In [1]:
%matplotlib inline
import torch
import torchvision
from torch import nn
import utils
from pathlib import Path
from PIL import Image
import pandas as pd
import json
import numpy as np

https://www.kaggle.com/competitions/classify-leaves

In [2]:
data_dir = Path(r'../data/classify-leaves')
data_csv_train = data_dir / 'train.csv'
data_csv_test = data_dir / 'test.csv'
data_images = data_dir / 'images'
resnet34_state_dict_path = Path('../data/leaves_resnet34_state_dict.sd')

In [3]:
class LeavesDataset(torch.utils.data.Dataset):
    """
        树叶数据集
    """
    def __init__(self, data_dir: Path, data_csv: Path, transforms=None, is_train=True):
        """
            data_dir: 数据集目录
            data_csv: 数据-类对应文件
            transforms：数据增广
        """
        self.data_dir = data_dir
        self.data_csv = data_csv
        self.class_mapping_path = data_dir / 'class_mapping.json'
        self.transforms = transforms
        self.is_train = is_train
        self.class_mapping_dict = dict()
        self.data_list = self.read_data()
        
    def read_data(self):
        """
            读取数据
        """
        df = pd.read_csv(self.data_csv.as_posix())
        if not self.class_mapping_path.exists():
            unique_labels = df['label'].unique() 
            for i, class_name in enumerate(unique_labels):
                self.class_mapping_dict[str(i)] = class_name
                self.class_mapping_dict[class_name] = i
            with open(self.class_mapping_path.as_posix(), 'w', encoding='utf8') as fp:
                json.dump(self.class_mapping_dict, fp)
        else:
            with open(self.class_mapping_path.as_posix(), 'r', encoding='utf8') as fp:
                self.class_mapping_dict = json.load(fp)
        return df.values.tolist()
    
    def __getitem__(self, i):
        # print(self.data_list[i])
        if self.is_train:
            _img_path, label = self.data_list[i]
        else:
            _img_path = self.data_list[i]
            if isinstance(_img_path, list):
                _img_path = _img_path[0]
        img_path = self.data_dir / _img_path
        img = Image.open(img_path.as_posix()).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)
        return (img, self.class_mapping_dict[label]) if self.is_train else img
    
    def __len__(self):
        return len(self.data_list)

In [4]:
# 使用RGB通道的均值和标准差，以标准化每个通道
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_augs = torchvision.transforms.Compose([
    torchvision.transforms.RandomResizedCrop(size=(112, 112), scale=(0.9, 1), ratio=(0.8, 1.2)),
    torchvision.transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    normalize])

test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(112, 112)),
    torchvision.transforms.ToTensor(),
    normalize])

In [5]:
leaves_dataset_train = LeavesDataset(data_dir, data_csv_train, transforms=train_augs)
leaves_dataset_test = LeavesDataset(data_dir, data_csv_test, transforms=test_augs, is_train=False)

In [6]:
net = torchvision.models.resnet34(pretrained=True)
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
net.fc

Linear(in_features=512, out_features=1000, bias=True)

In [8]:
# 自定义fc层，将最终的输出类别设置为当前需要的176
net.fc = nn.Linear(net.fc.in_features, 512)
net.add_module('relu2', nn.ReLU(inplace=True))
net.add_module('fc2', nn.Linear(512, 176))
nn.init.xavier_uniform_(net.fc.weight);
nn.init.xavier_uniform_(net.fc2.weight);
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
valid_dataset_len = int(len(leaves_dataset_train) / 10)
train_dataset_len = len(leaves_dataset_train) - valid_dataset_len
train_dataset, valid_dataset = torch.utils.data.random_split(
    dataset=leaves_dataset_train,
    lengths=[train_dataset_len, valid_dataset_len],
    generator=torch.Generator().manual_seed(0)
)

In [10]:
# 如果param_group=True，输出层中的模型参数将使用十倍的学习率
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5, weight_decay=1e-3, 
                      fc_lr_times=20, param_group=True, freeze=False, train_all_data=False):
    devices = utils.try_all_gpus()
    loss = nn.CrossEntropyLoss(reduction="none")
    if freeze:
        for name, param in net.named_parameters():
            if name not in ["fc.weight", "fc.bias", "fc2.weight", "fc2.bias"]:
                param.requires_grad = False
    if param_group:
        params_1x = [param for name, param in net.named_parameters()
             if name not in ["fc.weight", "fc.bias", "fc2.weight", "fc2.bias"]]
        trainer = torch.optim.SGD(
            [
                {'params': params_1x}, 
                {'params': net.fc.parameters(), 
                 'lr': learning_rate * fc_lr_times}
            ], 
            lr=learning_rate, 
            weight_decay=weight_decay
        )
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    if train_all_data:
        train_iter = torch.utils.data.DataLoader(
            leaves_dataset_train, batch_size=batch_size, shuffle=True)
        valid_iter = None
    else:
        train_iter = torch.utils.data.DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True)
        valid_iter = torch.utils.data.DataLoader(
            valid_dataset, batch_size=batch_size)
    utils.train_gpus(net, train_iter, valid_iter, loss, trainer, num_epochs, devices)

In [11]:
train_times = 5
for i in range(train_times):
    print(f'--- {i+1}/{train_times} ---')
    # 训练模型
    train_fine_tuning(net, learning_rate=5e-5, batch_size=128, num_epochs=6, 
                      weight_decay=1e-3, fc_lr_times=10)
    # 保存训练好的模型weight
    save_model_path = Path(fr'../data/leaves_resnet34_state_dict_{i+1}.sd')
    torch.save(net.state_dict(), save_model_path.as_posix())

--- 1/5 ---
training on: [device(type='cuda', index=0)], [2024-06-13 08:51:29]
epoch: 1/6, loss 3.525, train acc 0.284, test acc 0.488
epoch: 2/6, loss 1.500, train acc 0.615, test acc 0.647
epoch: 3/6, loss 0.986, train acc 0.736, test acc 0.731
epoch: 4/6, loss 0.737, train acc 0.803, test acc 0.761
epoch: 5/6, loss 0.584, train acc 0.842, test acc 0.796
epoch: 6/6, loss 0.470, train acc 0.872, test acc 0.809
*** 434.2 examples/sec on [device(type='cuda', index=0)] - [0:00:03:48], all: [0:00:10:05] ***
--- 2/5 ---
training on: [device(type='cuda', index=0)], [2024-06-13 09:01:33]
epoch: 1/6, loss 0.388, train acc 0.894, test acc 0.823
epoch: 2/6, loss 0.332, train acc 0.908, test acc 0.819
epoch: 3/6, loss 0.289, train acc 0.922, test acc 0.837
epoch: 4/6, loss 0.252, train acc 0.933, test acc 0.858
epoch: 5/6, loss 0.217, train acc 0.942, test acc 0.861
epoch: 6/6, loss 0.199, train acc 0.947, test acc 0.866
*** 434.9 examples/sec on [device(type='cuda', index=0)] - [0:00:03:47], al

In [13]:
# 调整学习率、权重衰减等参数，多次学习调整
train_fine_tuning(net, learning_rate=1e-5, batch_size=128, num_epochs=10, 
                  weight_decay=1e-2, fc_lr_times=10)

training on: [device(type='cuda', index=0)], [2024-06-13 09:56:26]
epoch: 1/10, loss 0.044, train acc 0.989, test acc 0.912
epoch: 2/10, loss 0.046, train acc 0.988, test acc 0.905
epoch: 3/10, loss 0.043, train acc 0.989, test acc 0.910
epoch: 4/10, loss 0.044, train acc 0.988, test acc 0.911
epoch: 5/10, loss 0.042, train acc 0.989, test acc 0.904
epoch: 6/10, loss 0.043, train acc 0.988, test acc 0.904
epoch: 7/10, loss 0.043, train acc 0.989, test acc 0.901
epoch: 8/10, loss 0.043, train acc 0.988, test acc 0.908
epoch: 9/10, loss 0.043, train acc 0.987, test acc 0.910
epoch: 10/10, loss 0.041, train acc 0.989, test acc 0.911
*** 445.1 examples/sec on [device(type='cuda', index=0)] - [0:00:06:11], all: [0:00:13:47] ***


In [14]:
# 训练所有数据
train_fine_tuning(net, learning_rate=1e-5, batch_size=128, num_epochs=10, 
                  weight_decay=2e-3, fc_lr_times=10, train_all_data=True)

training on: [device(type='cuda', index=0)], [2024-06-13 10:10:22]
epoch: 1/10, loss 0.067, train acc 0.980, test acc 0.000
epoch: 2/10, loss 0.065, train acc 0.982, test acc 0.000
epoch: 3/10, loss 0.061, train acc 0.982, test acc 0.000
epoch: 4/10, loss 0.062, train acc 0.981, test acc 0.000
epoch: 5/10, loss 0.062, train acc 0.982, test acc 0.000
epoch: 6/10, loss 0.061, train acc 0.982, test acc 0.000
epoch: 7/10, loss 0.057, train acc 0.983, test acc 0.000
epoch: 8/10, loss 0.056, train acc 0.984, test acc 0.000
epoch: 9/10, loss 0.054, train acc 0.985, test acc 0.000
epoch: 10/10, loss 0.054, train acc 0.984, test acc 0.000
*** 444.6 examples/sec on [device(type='cuda', index=0)] - [0:00:06:52], all: [0:00:14:10] ***


In [15]:
model_i = 6
save_model_path = Path(fr'../data/resnet34_state_dict_{model_i}.sd')

In [16]:
# 保存训练好的模型weight
torch.save(net.state_dict(), save_model_path.as_posix())

In [17]:
# 加载训练好的模型
net = None
devices = utils.try_all_gpus()
# 加载模型参数state_dict
net = torchvision.models.resnet34()
# 自定义fc层，将最终的输出类别设置为当前需要的176
net.fc = nn.Linear(net.fc.in_features, 512)
net.add_module('relu2', nn.ReLU(inplace=True))
net.add_module('fc2', nn.Linear(512, 176))
net.load_state_dict(torch.load(save_model_path.as_posix()))
# net.to(torch.device('cpu'))
net.to(devices[0])

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
net.eval()
test_iter = torch.utils.data.DataLoader(leaves_dataset_test, batch_size=128)

In [19]:
y_all = torch.Tensor([], device=torch.device('cpu'))

In [20]:
for i, X in enumerate(test_iter):
    # X = X.to(torch.device('cpu'))
    X = X.to(devices[0])
    y_hat = net(X)
    y_hat = y_hat.to(torch.device('cpu'))
    y_hat = y_hat.argmax(axis=1)
    y_all = torch.cat([y_all, y_hat], dim=0)
    print(i, y_all.shape, end='\r')

68 torch.Size([8800])

In [21]:
class_mapping_path = data_dir / 'class_mapping.json'
class_mapping_dict = dict()
with open(class_mapping_path.as_posix(), 'r', encoding='utf8') as fp:
    class_mapping_dict = json.load(fp)

In [22]:
y_all[:10]

tensor([  7.,  58., 136.,  51.,  58.,  20., 104., 110.,   9.,  50.])

In [23]:
y_all = y_all.type(torch.int)
y_all[:10]

tensor([  7,  58, 136,  51,  58,  20, 104, 110,   9,  50], dtype=torch.int32)

In [24]:
y_all_mapping_list = list()
for i in y_all:
    i_num = i.item()
    i_mapping = class_mapping_dict.get(str(i_num))
    y_all_mapping_list.append(i_mapping)

In [25]:
len(y_all_mapping_list)

8800

In [26]:
len(leaves_dataset_test.data_list), leaves_dataset_test.data_list[0]

(8800, ['images/18353.jpg'])

In [27]:
image_path_list = [i[0] for i in leaves_dataset_test.data_list]
len(image_path_list), image_path_list[0]

(8800, 'images/18353.jpg')

In [28]:
submission_data = {
    'image': image_path_list,
    'label': y_all_mapping_list
}

In [29]:
y_df = pd.DataFrame(submission_data)
y_df.head(5)

Unnamed: 0,image,label
0,images/18353.jpg,asimina_triloba
1,images/18354.jpg,platanus_occidentalis
2,images/18355.jpg,platanus_acerifolia
3,images/18356.jpg,pinus_bungeana
4,images/18357.jpg,platanus_occidentalis


In [30]:
y_df.to_csv((data_dir / 'submission.csv').as_posix(), index=False)