In [1]:
cp -R ../input/efficientnet-pytorch/ /kaggle/working/

In [2]:
#!pip install efficientnet_pytorch
!pip install /kaggle/working/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/ > /dev/null # no output
#!pip install efficientnet_pytorch --no-index --find-links=../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master/

!pip install torch_optimizer --no-index --find-links=file:///kaggle/input/torch-optimizer/torch_optimizer

[0mLooking in links: file:///kaggle/input/torch-optimizer/torch_optimizer
Processing /kaggle/input/torch-optimizer/torch_optimizer/torch_optimizer-0.1.0-py3-none-any.whl
Processing /kaggle/input/torch-optimizer/torch_optimizer/pytorch_ranger-0.1.1-py3-none-any.whl (from torch_optimizer)
Installing collected packages: pytorch-ranger, torch_optimizer
Successfully installed pytorch-ranger-0.1.1 torch_optimizer-0.1.0
[0m

In [3]:
import os
import gc
gc.enable()
import sys
import math
import json
import time
import random
from glob import glob
from datetime import datetime

import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import multiprocessing
from sklearn.preprocessing import LabelEncoder

import torch
import torchvision
from torch import Tensor
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler
from tqdm import tqdm

import efficientnet_pytorch

import torch_optimizer as optim
import albumentations as A

import sklearn

import warnings
warnings.filterwarnings("ignore")

In [4]:
IN_KERNEL = os.environ.get('KAGGLE_WORKING_DIR') is not None
BATCH_SIZE = 64
NUM_WORKERS = multiprocessing.cpu_count()
MAX_STEPS_PER_EPOCH = 15000
NUM_EPOCHS = 10
LOG_FREQ = 10
NUM_TOP_PREDICTS = 1

In [5]:
train = pd.read_csv('../input/landmark-recognition-2021/train.csv')
train_dir = '../input/landmark-recognition-2021/train/'

In [6]:
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, image_dir:str, mode: str):
        self.df = dataframe
        self.mode = mode
        self.image_dir = image_dir
        
        transforms_list = []
        if self.mode == 'train':
            transforms_list = [
                transforms.Resize((64,64)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomChoice([
                    transforms.RandomResizedCrop(64),
                    transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                    transforms.RandomAffine(degrees=15, translate=(0.2, 0.2),
                                            scale=(0.8, 1.2), shear=15,
                                            interpolation=Image.BILINEAR)
                ]),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
            ]
        else:
            transforms_list.extend([
                # Keep this resize same as train
                transforms.Resize((64,64)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
            ])
        self.transforms = transforms.Compose(transforms_list)

    def __getitem__(self, index: int):
        image_id = self.df.iloc[index].id
        image_path = f"{self.image_dir}/{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg"
        image = Image.open(image_path)
        image = self.transforms(image)

        if self.mode == 'test':
            return {'image':image}
        else:
            return {'image':image, 
                    'target':self.df.iloc[index].landmark_id}

    def __len__(self) -> int:
        return self.df.shape[0]

In [7]:
def load_data(train, train_dir):
    print('Original train_df size:', train.shape)     
    counts = train.landmark_id.value_counts()
    
    num_samples=10
    num_classes = 30000
    print(f'Select train data from top {num_classes} classes, {num_samples} samples of each class')
    x=[]
    y=[]
    
    for k,landmark_id in enumerate(tqdm(counts.index[:num_classes])):
        x.extend(train[train['landmark_id']==landmark_id]['id'].sample(n=num_samples, random_state=1,replace=True).values)
        y.extend([landmark_id]*num_samples)
    train=pd.DataFrame({'id':x,'landmark_id':y})
    print('Filtered train_df size:', train.shape)

    #Encode target labels with value between 0 and n_classes-1
    label_encoder = LabelEncoder() 
    label_encoder.fit(train.landmark_id.values)
    
    #Check if the number of the classes are ok
    assert len(label_encoder.classes_) == num_classes 

    train.landmark_id = label_encoder.transform(train.landmark_id)

    train_dataset = ImageDataset(train, train_dir, mode='train')

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              drop_last=True)

    return train_loader, label_encoder, num_classes

In [8]:
def radam(parameters, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
    if isinstance(betas, str):
        betas = eval(betas)
    return optim.RAdam(parameters,
                      lr=lr,
                      betas=betas,
                      eps=eps,
                      weight_decay=weight_decay)

In [9]:
class AverageMeter:
    ''' Computes and stores the average and current value '''
    def __init__(self) -> None:
        self.reset()

    def reset(self) -> None:
        self.val = 0.0
        self.avg = 0.0
        self.sum = 0.0
        self.count = 0

    def update(self, val: float, n: int = 1) -> None:
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [10]:
def GAP(predicts: torch.Tensor, confs: torch.Tensor, targets: torch.Tensor) -> float:
    ''' Simplified GAP@1 metric: only one prediction per sample is supported '''
    assert len(predicts.shape) == 1
    assert len(confs.shape) == 1
    assert len(targets.shape) == 1
    assert predicts.shape == confs.shape and confs.shape == targets.shape

    _, indices = torch.sort(confs, descending=True)

    confs = confs.cpu().numpy()
    predicts = predicts[indices.cpu()].cpu().numpy()
    targets = targets[indices.cpu()].cpu().numpy()

    res, true_pos = 0.0, 0

    for i, (c, p, t) in enumerate(zip(confs, predicts, targets)):
        rel = int(p == t)
        true_pos += rel

        res += true_pos / (i + 1) * rel

    res /= targets.shape[0] # FIXME: incorrect, not all test images depict landmarks
    return res

In [11]:
class EfficientNetEncoderHead(nn.Module):
    def __init__(self, depth, num_classes):
        super(EfficientNetEncoderHead, self).__init__()
        self.depth = depth
        self.base = efficientnet_pytorch.EfficientNet.from_name(f'efficientnet-b{self.depth}')
        pretrained_file = glob(f'../input/efficientnet-pytorch/efficientnet-b{self.depth}*')[0]
        checkpoint = torch.load(pretrained_file)
        self.base.load_state_dict(checkpoint)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.output_filter = self.base._fc.in_features
        self.classifier = nn.Linear(self.output_filter, num_classes)
        
    def forward(self, x):
        x = self.base.extract_features(x)
        x = self.avg_pool(x).squeeze(-1).squeeze(-1)
        x = self.classifier(x)
        return x

In [12]:
def train_step(train_loader, 
          model, 
          criterion, 
          optimizer,
          epoch, 
          lr_scheduler):
    print(f'epoch {epoch}')
    batch_time = AverageMeter()
    losses = AverageMeter()
    avg_score = AverageMeter()

    model.train()
    num_steps = min(len(train_loader), MAX_STEPS_PER_EPOCH)

    print(f'total batches: {num_steps}')

    end = time.time()
    lr = None

    for i, data in enumerate(train_loader):
        input_ = data['image']
        target = data['target']
        batch_size, _, _, _ = input_.shape
        
        output = model(input_.cuda())
        loss = criterion(output, target.cuda())
        confs, predicts = torch.max(output.detach(), dim=1)
        avg_score.update(GAP(predicts, confs, target))
        losses.update(loss.data.item(), input_.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        lr = optimizer.param_groups[0]['lr']
        
        batch_time.update(time.time() - end)
        end = time.time()

        if i % LOG_FREQ == 0:
            print(f'{epoch} [{i}/{num_steps}]\t'
                    f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    f'loss {losses.val:.4f} ({losses.avg:.4f})\t'
                    f'GAP {avg_score.val:.4f} ({avg_score.avg:.4f})'
                    + str(lr))

    print(f' * average GAP on train {avg_score.avg:.4f}')

# Train model

In [13]:
if __name__ == '__main__':
    print("A")
    train_loader, label_encoder, num_classes = load_data(train, train_dir)
    
    print("B")
    model = EfficientNetEncoderHead(depth=7, num_classes=num_classes)
    
    print("C")
    model.cuda()
    
    print("D")
    criterion = nn.CrossEntropyLoss()
    
    print("E")
    optimizer = radam(model.parameters(), lr=1e-3, betas=(0.9,0.999), eps=1e-3, weight_decay=1e-4)
    
    print("F")
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader)*NUM_EPOCHS, eta_min=1e-6)
    
    print("G")
    for epoch in range(1, NUM_EPOCHS + 1):
        print('-' * 50)
        train_step(train_loader, model, criterion, optimizer, epoch, scheduler)

A
Original train_df size: (1580470, 2)
Select train data from top 30000 classes, 10 samples of each class


100%|██████████| 30000/30000 [01:15<00:00, 395.52it/s]


Filtered train_df size: (300000, 2)
B
C
D
E
F
G
--------------------------------------------------
epoch 1
total batches: 4687
1 [0/4687]	time 7.520 (7.520)	loss 10.3197 (10.3197)	GAP 0.0000 (0.0000)0.0009999999988779418
1 [10/4687]	time 0.482 (1.197)	loss 10.4147 (10.3468)	GAP 0.0000 (0.0000)0.0009999998642309794
1 [20/4687]	time 0.477 (0.976)	loss 10.2986 (10.3283)	GAP 0.0000 (0.0000)0.0009999995051724715
1 [30/4687]	time 0.470 (0.895)	loss 10.3274 (10.3295)	GAP 0.0000 (0.0001)0.0009999989217025793
1 [40/4687]	time 0.476 (0.861)	loss 10.3564 (10.3309)	GAP 0.0000 (0.0000)0.000999998113821565
1 [50/4687]	time 0.477 (0.834)	loss 10.3358 (10.3314)	GAP 0.0000 (0.0000)0.000999997081529792
1 [60/4687]	time 0.485 (0.814)	loss 10.3327 (10.3309)	GAP 0.0000 (0.0000)0.000999995824827723
1 [70/4687]	time 0.473 (0.801)	loss 10.3535 (10.3286)	GAP 0.0000 (0.0000)0.0009999943437159237
1 [80/4687]	time 0.466 (0.794)	loss 10.3603 (10.3296)	GAP 0.0000 (0.0000)0.0009999926381950592
1 [90/4687]	time 0.483

# Save classes of the label_encoder

In [14]:
np.save('/kaggle/working/classes.npy', label_encoder.classes_)

# Save model weights

In [15]:
torch.save(model.state_dict(), "/kaggle/working/new_weight_efficientnet.pth")

# Load label encoder

In [16]:
label_encoder = LabelEncoder()
label_encoder.classes_ = np.load('/kaggle/working/classes.npy')

# Load model weights

In [17]:
model = EfficientNetEncoderHead(depth=7, num_classes=len(label_encoder.classes_))
model.cuda()
model.load_state_dict(torch.load("/kaggle/working/new_weight_efficientnet.pth"))

<All keys matched successfully>

In [18]:
test_dir = '../input/landmark-recognition-2021/test/'

In [19]:
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame, image_dir:str, mode: str):
        self.df = dataframe
        self.mode = mode
        self.image_dir = image_dir
        
        transforms_list = []
        if self.mode == 'train':
            # Increase image size from (64,64) to higher resolution,
            # Make sure to change in RandomResizedCrop as well.
            transforms_list = [
                transforms.Resize((64,64)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomChoice([
                    transforms.RandomResizedCrop(64),
                    transforms.ColorJitter(0.2, 0.2, 0.2, 0.2),
                    transforms.RandomAffine(degrees=15, translate=(0.2, 0.2),
                                            scale=(0.8, 1.2), shear=15,
                                            resample=Image.BILINEAR)
                ]),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
            ]
        else:
            transforms_list.extend([
                # Keep this resize same as train
                transforms.Resize((64,64)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
            ])
        self.transforms = transforms.Compose(transforms_list)

    def __getitem__(self, index: int):
        image_id = self.df.iloc[index].id
        image_path = f"{self.image_dir}/{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg"
        image = Image.open(image_path)
        image = self.transforms(image)

        if self.mode == 'test':
            return {'image':image}
        else:
            return {'image':image, 
                    'target':self.df.iloc[index].landmark_id}

    def __len__(self) -> int:
        return self.df.shape[0]

In [20]:
class EfficientNetEncoderHead(nn.Module):
    def __init__(self, depth, num_classes):
        super(EfficientNetEncoderHead, self).__init__()
        self.depth = depth
        self.base = efficientnet_pytorch.EfficientNet.from_name(f'efficientnet-b{self.depth}')#tropicbird
        pretrained_file = glob(f'../input/efficientnet-pytorch/efficientnet-b{self.depth}*')[0]
        checkpoint = torch.load(pretrained_file)
        self.base.load_state_dict(checkpoint)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.output_filter = self.base._fc.in_features
        self.classifier = nn.Linear(self.output_filter, num_classes)
    def forward(self, x):
        x = self.base.extract_features(x)
        x = self.avg_pool(x).squeeze(-1).squeeze(-1)
        x = self.classifier(x)
        return x

In [21]:
def inference(data_loader, model):
    model.eval()

    activation = nn.Softmax(dim=1)
    all_predicts, all_confs, all_targets = [], [], []

    with torch.no_grad():
        for i, data in enumerate(tqdm(data_loader, disable=IN_KERNEL)):
            if data_loader.dataset.mode != 'test':
                input_, target = data['image'], data['target']
            else:
                input_, target = data['image'], None

            output = model(input_.cuda())
            output = activation(output)

            confs, predicts = torch.topk(output, NUM_TOP_PREDICTS)
            all_confs.append(confs)
            all_predicts.append(predicts)

            if target is not None:
                all_targets.append(target)

    predicts = torch.cat(all_predicts)
    confs = torch.cat(all_confs)
    targets = torch.cat(all_targets) if len(all_targets) else None

    return predicts, confs, targets

In [22]:
def generate_submission(test_loader, model, label_encoder):
    predicts_gpu, confs_gpu, _ = inference(test_loader, model)
    predicts, confs = predicts_gpu.cpu().numpy(), confs_gpu.cpu().numpy()

    #The modified labels are inversed to the original labels
    labels = [label_encoder.inverse_transform(pred) for pred in predicts]
    print('labels')
    print(np.array(labels))
    print('confs')
    print(np.array(confs))

    sub = test_loader.dataset.df

    def concat(label: np.ndarray, conf: np.ndarray):
        return ' '.join([f'{str(L)} {str(np.round(c,4))}' for L, c in zip(label, conf)])
    sub['landmarks'] = [concat(label, conf) for label, conf in zip(labels, confs)]

    sub = sub.set_index('id')
    sub.to_csv('submission.csv')

In [23]:
test_filenames=[]
for dirname, _, filenames in os.walk('../input/landmark-recognition-2021/test'):
    for filename in filenames:
        test_filenames.append(filename.split(".")[0])
test=pd.DataFrame({"id":test_filenames,"landmarks":""})

In [24]:
test_dataset = ImageDataset(test, test_dir, mode='test')
test_loader = DataLoader(test_dataset, 
                         batch_size=BATCH_SIZE,
                         shuffle=False, num_workers=NUM_WORKERS)

# Make prediction

In [25]:
generate_submission(test_loader, model, label_encoder)
pd.read_csv("./submission.csv")

100%|██████████| 162/162 [01:12<00:00,  2.24it/s]


labels
[[114300]
 [198053]
 [ 16427]
 ...
 [119367]
 [ 68495]
 [ 86609]]
confs
[[0.4626833 ]
 [0.5769745 ]
 [0.57923186]
 ...
 [0.5873152 ]
 [0.09975833]
 [0.5853488 ]]


Unnamed: 0,id,landmarks
0,777f9efff0fc6b81,114300 0.4627
1,7774e44062fbd8bc,198053 0.577
2,777173e839e6cfa7,16427 0.5792
3,7726658184c7e337,91684 0.7993
4,77be72c73bbf4f18,2093 0.686
...,...,...
10340,dd912d66515ce5f0,171140 0.3512
10341,dd90d3737bbbcc05,168137 0.1261
10342,dd6db4a564eebf38,119367 0.5873
10343,dddcd5b7d09cd416,68495 0.0998
