In [1]:
%matplotlib inline
import torch
import torchvision
from torch import nn
import utils
from pathlib import Path
from PIL import Image
import pandas as pd
import json
import numpy as np
from collections import OrderedDict

In [2]:
data_dir = Path(r'../data/cifar-10')
img_train_dir = data_dir / 'train'
img_test_dir = data_dir / 'test'
train_labels_csv = data_dir / 'trainLabels.csv'

In [3]:
class_mapping = {
    'airplane': 0,
    'automobile': 1,
    'bird': 2,
    'cat': 3,
    'deer': 4,
    'dog': 5,
    'frog': 6,
    'horse': 7,
    'ship': 8,
    'truck': 9
}

In [4]:
train_labels_df = pd.read_csv(train_labels_csv.as_posix())
train_labels_df.head()

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile


In [5]:
train_labels_df['label'] = train_labels_df['label'].apply(lambda x: class_mapping[x])
train_labels_df.head()

Unnamed: 0,id,label
0,1,6
1,2,9
2,3,9
3,4,4
4,5,1


In [6]:
train_labels = train_labels_df['label'].values.tolist()
train_labels[:10]

[6, 9, 9, 4, 1, 1, 2, 7, 8, 3]

In [7]:
class CifarDataset(torch.utils.data.Dataset):
    """
        cifar-10数据集
    """
    def __init__(self, data_dir: Path, transforms=None, is_train=True):
        """
            data_dir: 数据集目录
            data_csv: 数据-类对应文件
            transforms：数据增广
        """
        self.data_dir = data_dir
        self.transforms = transforms
        self.is_train = is_train
        self.data_list = self.read_data()
        
    def read_data(self):
        """
            读取数据
        """
        return [img_path for img_path in self.data_dir.iterdir()]

    def __getitem__(self, i):
        # print(self.data_list[i])
        img_path = self.data_list[i]
        img = Image.open(img_path.as_posix()).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)          
        if self.is_train:
            img_id = int(img_path.stem)
            label = train_labels[img_id-1]
            return img, label     
        return img
    
    def __len__(self):
        return len(self.data_list)

In [8]:
# 使用RGB通道的均值和标准差，以标准化每个通道
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(128, 128)),
    torchvision.transforms.RandomResizedCrop(size=(112, 112), scale=(0.64, 1), ratio=(0.8, 1.2)),
    torchvision.transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    normalize])

test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(112, 112)),
    torchvision.transforms.ToTensor(),
    normalize])

In [9]:
train_dataset_all = CifarDataset(img_train_dir, transforms=train_augs)
test_dataset = CifarDataset(img_test_dir, transforms=test_augs, is_train=False)

In [10]:
valid_dataset_len = int(len(train_dataset_all) / 10)
train_dataset_len = len(train_dataset_all) - valid_dataset_len

train_dataset, valid_dataset = torch.utils.data.random_split(
    dataset=train_dataset_all,
    lengths=[train_dataset_len, valid_dataset_len],
    generator=torch.Generator().manual_seed(0)
)

In [11]:
net = torchvision.models.resnet34(pretrained=True)
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
# 自定义fc层，将最终的输出类别设置为当前需要的10
# net.fc = nn.Sequential(
#     OrderedDict(
#         [('fc', nn.Linear(net.fc.in_features, 512)),
#          ('relu', nn.ReLU(inplace=True)),
#          ('fc2', nn.Linear(512, 10))]
#     )
# )
# nn.init.xavier_uniform_(net.fc.fc.weight);
# nn.init.xavier_uniform_(net.fc.fc2.weight);

net.fc = nn.Linear(net.fc.in_features, 256)
net = nn.Sequential(
    OrderedDict(
        [('resnet', net),
         ('relu', nn.ReLU(inplace=True)),
         ('fc', nn.Linear(256, 10))]
    )
)
nn.init.xavier_uniform_(net.resnet.fc.weight);
nn.init.xavier_uniform_(net.fc.weight);

# net.fc = nn.Linear(net.fc.in_features, 512)
# net.add_module('relu2', nn.ReLU(inplace=True))
# net.add_module('fc2', nn.Linear(512, 10))
# nn.init.xavier_uniform_(net.fc.weight);
# nn.init.xavier_uniform_(net.fc2.weight);
net

Sequential(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [13]:
# 测试网络畅通
random_x = torch.randn(size=[1, 3, 112, 112])
random_y = net(random_x)
random_y.shape

torch.Size([1, 10])

In [11]:
# 如果param_group=True，输出层中的模型参数将使用十倍的学习率
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5, weight_decay=1e-3, 
                      fc_lr_times=10, param_group=True, freeze=False, train_all_data=False):
    devices = utils.try_all_gpus()
    loss = nn.CrossEntropyLoss(reduction="none")
    if freeze:
        for name, param in net.named_parameters():
            if name not in ["resnet.fc.weight", "resnet.fc.bias", "fc.weight", "fc.bias"]:
                param.requires_grad = False
    if param_group:
        params_1x = [param for name, param in net.named_parameters()
             if name not in ["resnet.fc.weight", "resnet.fc.bias", "fc.weight", "fc.bias"]]
        trainer = torch.optim.SGD(
            [
                {'params': params_1x}, 
                {'params': net.fc.parameters(), 
                 'lr': learning_rate * fc_lr_times}
            ], 
            lr=learning_rate, 
            weight_decay=weight_decay
        )
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    if train_all_data:
        train_iter = torch.utils.data.DataLoader(
            train_dataset_all, batch_size=batch_size, shuffle=True)
        valid_iter = None
    else:
        train_iter = torch.utils.data.DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True)
        valid_iter = torch.utils.data.DataLoader(
            valid_dataset, batch_size=batch_size)
    utils.train_gpus(net, train_iter, valid_iter, loss, trainer, num_epochs, devices)

In [15]:
train_times = 5
for i in range(train_times):
    print(f'--- {i+1}/{train_times} ---')
    # 训练模型
    train_fine_tuning(net, learning_rate=5e-5, batch_size=256, num_epochs=6, 
                      weight_decay=1e-3, fc_lr_times=10)
    # 保存训练好的模型weight
    save_model_path = Path(fr'../data/cifar_resnet34_state_dict_{i+1}.sd')
    torch.save(net.state_dict(), save_model_path.as_posix())

--- 1/5 ---
training on: [device(type='cuda', index=0)], [2024-06-13 01:00:41]
epoch: 1/6, loss 0.655, train acc 0.781, test acc 0.892
epoch: 2/6, loss 0.273, train acc 0.907, test acc 0.912
epoch: 3/6, loss 0.212, train acc 0.927, test acc 0.923
epoch: 4/6, loss 0.181, train acc 0.938, test acc 0.936
epoch: 5/6, loss 0.155, train acc 0.947, test acc 0.936
epoch: 6/6, loss 0.136, train acc 0.953, test acc 0.936
*** 430.1 examples/sec on [device(type='cuda', index=0)] - [0:00:10:27], all: [0:00:21:20] ***
--- 2/5 ---
training on: [device(type='cuda', index=0)], [2024-06-13 01:22:00]
epoch: 1/6, loss 0.121, train acc 0.958, test acc 0.942
epoch: 2/6, loss 0.103, train acc 0.964, test acc 0.942
epoch: 3/6, loss 0.101, train acc 0.965, test acc 0.940
epoch: 4/6, loss 0.088, train acc 0.969, test acc 0.946
epoch: 5/6, loss 0.081, train acc 0.972, test acc 0.945
epoch: 6/6, loss 0.073, train acc 0.974, test acc 0.947
*** 433.0 examples/sec on [device(type='cuda', index=0)] - [0:00:10:23], al

In [18]:
# 调整学习率、权重衰减等参数，多次学习调整
train_fine_tuning(net, learning_rate=1e-5, batch_size=256, num_epochs=10, 
                  weight_decay=1e-3, fc_lr_times=10, train_all_data=True)

training on: [device(type='cuda', index=0)], [2024-06-13 07:59:55]
epoch: 1/10, loss 0.035, train acc 0.989, test acc 0.000
epoch: 2/10, loss 0.034, train acc 0.989, test acc 0.000
epoch: 3/10, loss 0.031, train acc 0.990, test acc 0.000
epoch: 4/10, loss 0.030, train acc 0.990, test acc 0.000
epoch: 5/10, loss 0.029, train acc 0.991, test acc 0.000
epoch: 6/10, loss 0.028, train acc 0.991, test acc 0.000
epoch: 7/10, loss 0.027, train acc 0.991, test acc 0.000
epoch: 8/10, loss 0.027, train acc 0.992, test acc 0.000
epoch: 9/10, loss 0.025, train acc 0.992, test acc 0.000
epoch: 10/10, loss 0.025, train acc 0.992, test acc 0.000
*** 433.0 examples/sec on [device(type='cuda', index=0)] - [0:00:19:14], all: [0:00:41:29] ***


In [12]:
model_i = 6
save_model_path = Path(fr'../data/cifar_resnet34_state_dict_{model_i}.sd')

In [20]:
# 保存训练好的模型weight
torch.save(net.state_dict(), save_model_path.as_posix())

In [13]:
# 加载训练好的模型
net = None
devices = utils.try_all_gpus()
# 加载模型参数state_dict
net = torchvision.models.resnet34()
# 自定义fc层，将最终的输出类别设置为当前需要的10
net.fc = nn.Linear(net.fc.in_features, 256)
net = nn.Sequential(
    OrderedDict(
        [('resnet', net),
         ('relu', nn.ReLU(inplace=True)),
         ('fc', nn.Linear(256, 10))]
    )
)

net.load_state_dict(torch.load(save_model_path.as_posix()))
# net.to(torch.device('cpu'))
net.to(devices[0])

Sequential(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_r

In [14]:
net.eval()
batch_size = 256
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [15]:
y_all = torch.Tensor([], device=torch.device('cpu'))
y_hat_list = list()

In [16]:
for i, X in enumerate(test_iter):
    # X = X.to(torch.device('cpu'))
    X = X.to(devices[0])
    y_hat = net(X)
    y_hat = y_hat.to(torch.device('cpu'))
    y_hat = y_hat.argmax(axis=1)
    y_hat_list.append(y_hat)
    print(i, i * batch_size + len(y_hat), end='\r')
y_all = torch.cat([*y_hat_list], dim=0)
print(y_all.shape)

torch.Size([300000])


In [17]:
y_all[:10]

tensor([4, 3, 2, 4, 6, 3, 4, 0, 6, 1])

In [19]:
class_mapping_reverse = dict()
for k, v in class_mapping.items():
    class_mapping_reverse[str(v)] = k
class_mapping_reverse

{'0': 'airplane',
 '1': 'automobile',
 '2': 'bird',
 '3': 'cat',
 '4': 'deer',
 '5': 'dog',
 '6': 'frog',
 '7': 'horse',
 '8': 'ship',
 '9': 'truck'}

In [20]:
y_all_mapping_list = list()
for i in y_all:
    i_num = i.item()
    i_mapping = class_mapping_reverse.get(str(i_num))
    y_all_mapping_list.append(i_mapping)
y_all_mapping_list[:10]

['deer',
 'cat',
 'bird',
 'deer',
 'frog',
 'cat',
 'deer',
 'airplane',
 'frog',
 'automobile']

In [25]:
test_dataset.data_list[:10]

[WindowsPath('../data/cifar-10/test/1.png'),
 WindowsPath('../data/cifar-10/test/10.png'),
 WindowsPath('../data/cifar-10/test/100.png'),
 WindowsPath('../data/cifar-10/test/1000.png'),
 WindowsPath('../data/cifar-10/test/10000.png'),
 WindowsPath('../data/cifar-10/test/100000.png'),
 WindowsPath('../data/cifar-10/test/100001.png'),
 WindowsPath('../data/cifar-10/test/100002.png'),
 WindowsPath('../data/cifar-10/test/100003.png'),
 WindowsPath('../data/cifar-10/test/100004.png')]

In [30]:
img_id_list = list()
for img_path in test_dataset.data_list:
    img_id = img_path.stem
    img_id_list.append(int(img_id))

In [31]:
submission_data = {
    'id': img_id_list,
    'label': y_all_mapping_list
}
y_df = pd.DataFrame(submission_data)
y_df.head(5)

Unnamed: 0,id,label
0,1,deer
1,10,cat
2,100,bird
3,1000,deer
4,10000,frog


In [33]:
y_df_sort = y_df.sort_values(by=['id'], axis=0, ascending=True)
y_df_sort.head(5)

Unnamed: 0,id,label
0,1,deer
111111,2,airplane
222222,3,automobile
233334,4,ship
244445,5,airplane


In [34]:
y_df_sort.to_csv((data_dir / 'submission.csv').as_posix(), index=False)