In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import torchvision
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
%matplotlib inline
from d2l import torch as d2l
import random
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
import time
import pandas as pd
from PIL import Image
from modules import *

In [2]:
path = '../data/dog-breed-identification/'
train_csv = pd.read_csv(path + 'labels.csv')
label_list = sorted(train_csv['breed'].unique().tolist())
test_csv = pd.read_csv(path + 'sample_submission.csv')
print(train_csv.shape, test_csv.shape)

(10222, 2) (10357, 121)


In [3]:
# images = []
# for i in range(len(train_csv['id'])):
#     image = (transforms.ToTensor()(Image.open(path + 'train/' + train_csv['id'][i] + '.jpg'))).to(torch.float32).flatten(1, 2)
#     images.append(image)
# flattened_image = torch.cat(images, dim=1)
# print(flattened_image.shape)                                    # torch.Size([3, 1882650608])
# print(flattened_image.mean(dim=1), flattened_image.std(dim=1))  # tensor([0.4736, 0.4504, 0.3909]) tensor([0.2655, 0.2607, 0.2650])

In [4]:
class TrainDataset(data.Dataset):
    def __init__(self, dataset):
        super().__init__()
        self.dataset = dataset
        
        self.trans = transforms.Compose([transforms.RandomCrop(224),
                                         transforms.RandomHorizontalFlip(p=0.5),
                                         transforms.ColorJitter(brightness=0.2,
                                                                contrast=0.2,
                                                                saturation=0.2,
                                                                hue=0.2),
                                         transforms.ToTensor(),
                                         transforms.Normalize(mean=[0.4736, 0.4504, 0.3909],
                                                              std=[0.2655, 0.2607, 0.2650],
                                                              inplace=True)])
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, index):
        image, label = self.dataset[index]
        resize = transforms.Resize(random.randint(256, 480))
        return self.trans(resize(image)), label

In [5]:
class ValidDataset(data.Dataset):
    def __init__(self, dataset):
        super().__init__()
        self.dataset = dataset
        self.trans = transforms.Compose([transforms.Resize(256),
                                         transforms.TenCrop(224),
                                         transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
                                         transforms.Normalize(mean=[0.4736, 0.4504, 0.3909],
                                                              std=[0.2655, 0.2607, 0.2650],
                                                              inplace=True)])
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, index):
        image, label = self.dataset[index]
        return self.trans(image), label

In [6]:
# class ValidDataset(data.Dataset):
#     def __init__(self, dataset):
#         super().__init__()
#         self.dataset = dataset
#         self.trans = transforms.Compose([
#                                          # transforms.Resize(256),
#                                          # transforms.CenterCrop(224),
#                                          transforms.ToTensor(),
#                                          transforms.Normalize(mean=[0.4736, 0.4504, 0.3909],
#                                                               std=[0.2655, 0.2607, 0.2650],
#                                                               inplace=True)])
        
#     def __len__(self):
#         return len(self.dataset)
    
#     def __getitem__(self, index):
#         image, label = self.dataset[index]
#         return self.trans(image), label

In [7]:
class TrainValidDataset(data.Dataset):
    def __init__(self):
        super().__init__()
                
    def __len__(self):
        return train_csv.shape[0]
    
    def __getitem__(self, index):
        image = Image.open(path + 'train/' + train_csv['id'][index] + '.jpg')
        label = label_list.index(train_csv['breed'][index])
        return image, label

In [8]:
train_dataset, valid_dataset = data.random_split(TrainValidDataset(),
                                                 [9200, 10222-9200])
train_dataset, valid_dataset = TrainDataset(train_dataset), ValidDataset(valid_dataset)

In [9]:
class TestDataset(data.Dataset):
    def __init__(self, size, horizontal_flip):
        super().__init__()
        self.trans = [transforms.Resize(size),
                      transforms.ToTensor(),
                      transforms.Normalize(mean=[0.4736, 0.4504, 0.3909],
                                           std=[0.2655, 0.2607, 0.2650],
                                           inplace=True)]
        if horizontal_flip:
            self.trans.insert(0, transforms.RandomHorizontalFlip(p=1))
        self.trans = transforms.Compose(self.trans)
    def __len__(self):
        return test_csv.shape[0]
    
    def __getitem__(self, index):
        image = Image.open(path + 'test/' + test_csv['id'][index] + '.jpg')
        return self.trans(image)

In [10]:
class SubmissionGenerater:
    def __init__(self, batch_size):
        self.sizes = [224, 256, 384, 480, 640]
        self.datasets = []
        for size in self.sizes:
            self.datasets += [TestDataset(size, False), TestDataset(size, True)]
        # 每个数据集创建一个dataloader
        self.dataloaders = [data.DataLoader(dataset,
                                            batch_size=batch_size,
                                            shuffle=False,
                                            num_workers=8) for dataset in self.datasets]
    def generate(self, net):
        net.eval()
        outputs = {}
        with torch.no_grad():
            # 对每个dataloader都过一遍
            for i, dataloader in enumerate(self.dataloaders):
                print(f'{i+1:2d} dataset inferencing')
                for i, input, in enumerate(dataloader):
                    input = input.to(device)
                    output = net(input)
                    # 把网络的输出存储起来
                    try:
                        outputs[i] += F.softmax(output, dim=1)
                    except KeyError:
                        outputs[i] = F.softmax(output, dim=1)
        output_tensor = torch.concat([outputs[i] for i in range(len(outputs))], dim=0)
        print(output_tensor.shape)
        
        rows = []
        column = ['id'] + label_list
        for i in range(output_tensor.shape[0]):
            row = [test_csv['id'][i]] + list(output_tensor[i].cpu().numpy())
            rows.append(pd.Series(row, index=column))
        submission = pd.DataFrame(rows)
        
        return submission, output_tensor

In [11]:
def evaluate_loss_acc(net, data_iter, criterion, device=device):
    """使用GPU计算模型在数据集上的精度。"""
    net.eval()  # 设置为评估模式
    loss = []
    # 正确预测的数量，总预测的数量
    metric = d2l.Accumulator(2)
    with torch.no_grad():
        for input, target in data_iter:
            input = input.to(device)
            target = target.to(device)
            
            # output = net(input)
            bs, ncrops, c, h, w = input.size()
            output = net(input.view(-1, c, h, w))
            output = output.view(bs, ncrops, -1).mean(dim=1)
            
            loss.append(float(criterion(output, target).item()))
            metric.add(d2l.accuracy(output, target), target.numel())
    return sum(loss) / len(loss), metric[0] / metric[1]

In [12]:
def get_lr(optimizer):
    return (optimizer.state_dict()['param_groups'][0]['lr'])

In [13]:
def train_ResNet(net,
                 batch_size,
                 lr,
                 num_epochs,
                 weight_decay=1e-4,
                 warm_up=None):

    writer = SummaryWriter(f'runs/ResNet_ImageNet_{net.architecture}_{net.option}_bn={net.batch_norm}' + ('warmup' if warm_up is not None else ''))
    train_iter = data.DataLoader(train_dataset, batch_size=batch_size,
                                 shuffle=True, num_workers=8)
    valid_iter = data.DataLoader(valid_dataset, batch_size=batch_size//10, 
                                 shuffle=False, num_workers=8)
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
            # nn.init.normal_()
    if warm_up is not None:
        net.apply(init_weights)
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=lr,
                                weight_decay=weight_decay,
                                momentum=0.9)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-5, verbose=False)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2, eta_min=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, threshold=0.0001, verbose=True)
    
    criterion = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        tic = time.time()
        metric = d2l.Accumulator(3)
        net.train()
        for i, (input, target) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            input, target = input.to(device), target.to(device)
            output = net(input)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(loss * input.shape[0],
                           d2l.accuracy(output, target),
                           input.shape[0])
            timer.stop()
            train_loss = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        valid_loss, valid_acc = evaluate_loss_acc(net, valid_iter, criterion, device)
        writer.add_scalar('train/loss', train_loss, global_step=epoch+1)
        writer.add_scalar('train/accuracy', train_acc, global_step=epoch+1)
        writer.add_scalar('valid/loss', valid_loss, global_step=epoch+1)
        writer.add_scalar('valid/accuracy', valid_acc, global_step=epoch+1)
        writer.add_scalar('learning rate', get_lr(optimizer), global_step=epoch+1)
        # scheduler.step()
        scheduler.step(valid_loss)
        toc = time.time()
        print(f"epoch {epoch+1:3d}, train loss: {train_loss:.4f}, train accuracy: {train_acc:.4f}, \
valid loss: {valid_loss:.4f}, valid accuracy: {valid_acc:.4f}, time: {toc-tic:.4f}")
        if (warm_up is not None) and valid_acc >= warm_up:
            break
    if warm_up is not None:
        torch.save(net.state_dict(),
                   f'ResNet_ImageNet_{net.architecture}_{net.option}_bn={net.batch_norm}_warmup.pth')        
    print(f'train loss {train_loss:.3f}, train acc {train_acc:.3f}, '
          f'valid loss {valid_loss:.3f}, valid acc {valid_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')

In [14]:
net = ResNet_ImageNet(architecture='50',
                      num_classes = len(label_list),
                      option='B',
                      batch_norm=True,
                      dropout=0.4,
                      plain=False).to(device)
net.print_num_params() 

23,753,912 total parameters.
23,753,912 trainable parameters.


In [15]:
net.load_state_dict(torch.load(f'ResNet_ImageNet_{net.architecture}_{net.option}_bn={net.batch_norm}_warmup.pth'))

<All keys matched successfully>

In [16]:
train_ResNet(net,
             batch_size=128,
             lr=0.01,
             num_epochs=150,
             weight_decay=1e-3)

epoch   1, train loss: 3.4154, train accuracy: 0.1859, valid loss: 3.3845, valid accuracy: 0.1937, time: 57.2727
epoch   2, train loss: 3.3565, train accuracy: 0.1926, valid loss: 3.1352, valid accuracy: 0.2329, time: 56.2099
epoch   3, train loss: 3.2900, train accuracy: 0.2068, valid loss: 3.4009, valid accuracy: 0.1879, time: 56.4048
epoch   4, train loss: 3.2816, train accuracy: 0.2097, valid loss: 3.2545, valid accuracy: 0.2104, time: 56.3249
epoch   5, train loss: 3.2227, train accuracy: 0.2218, valid loss: 3.1680, valid accuracy: 0.2260, time: 56.3889
epoch   6, train loss: 3.2186, train accuracy: 0.2233, valid loss: 3.2950, valid accuracy: 0.2202, time: 56.2117
epoch   7, train loss: 3.1673, train accuracy: 0.2342, valid loss: 3.2908, valid accuracy: 0.2407, time: 56.3590
epoch   8, train loss: 3.1359, train accuracy: 0.2315, valid loss: 3.0908, valid accuracy: 0.2554, time: 56.7769
epoch   9, train loss: 3.0911, train accuracy: 0.2436, valid loss: 3.2840, valid accuracy: 0.211

KeyboardInterrupt: 

In [17]:
torch.save(net.state_dict(), f'ResNet_ImageNet_{net.architecture}_{net.option}_bn={net.batch_norm}.pth')

In [18]:
net.load_state_dict(torch.load(f'ResNet_ImageNet_{net.architecture}_{net.option}_bn={net.batch_norm}.pth'))

<All keys matched successfully>

In [19]:
generater = SubmissionGenerater(batch_size=1)

In [20]:
submission, output_tensor = generater.generate(net)

 1 dataset inferencing
 2 dataset inferencing
 3 dataset inferencing
 4 dataset inferencing
 5 dataset inferencing
 6 dataset inferencing
 7 dataset inferencing
 8 dataset inferencing
 9 dataset inferencing
10 dataset inferencing
torch.Size([10357, 120])


In [21]:
submission.to_csv(f'submission_{net.architecture}_{net.option}_bn={net.batch_norm}_plain={net.plain}.csv', index=False)