In [1]:
import os

import random
from datetime import datetime
from typing import Dict, Tuple, Any
import pickle
from tqdm import tqdm

import math

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import cv2
import albumentations
from torch.utils.data import Dataset

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.autograd import Variable
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import DataLoader, Dataset

import timm

In [2]:
DATA_DIR = '../input/'
LOAD_MODEL = 'effnetb3_600_fold1_epoch1'

IMAGE_SIZE = 600
BATCH_SIZE = 48
NUM_WORKERS = 4
USE_AMP = True

In [None]:
class LandmarkDataset(Dataset):
    def __init__(self, csv, transform=None):

        self.csv = csv.reset_index()
        self.transform = transform

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, index):
        
        row = self.csv.iloc[index]

        image = cv2.imread(row.filepath)[:,:,::-1]

        if self.transform is not None:
            res = self.transform(image=image)
            image = res['image'].astype(np.float32)
        else:
            image = image.astype(np.float32)

        image = image.transpose(2, 0, 1)
        
        return torch.tensor(image)


transforms = albumentations.Compose([
    albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albumentations.Normalize()
])

In [None]:
class Swish(torch.autograd.Function):

    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))


class Swish_module(nn.Module):
    def forward(self, x):
        return Swish.apply(x)


class DenseCrossEntropy(nn.Module):
    def forward(self, x, target):
        x = x.float()
        target = target.float()
        logprobs = torch.nn.functional.log_softmax(x, dim=-1)

        loss = -logprobs * target
        loss = loss.sum(-1)
        return loss.mean()


class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features*k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
    
    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine   


def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)


class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=True):
        super(GeM,self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1)*p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)
    
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'


class EffnetB3_Landmark(nn.Module):

    def __init__(self, out_dim, load_pretrained=True):
        super().__init__()

        self.backbone = timm.create_model('tf_efficientnet_b3_ns', pretrained=False)
        self.feat = nn.Sequential(
            nn.Linear(self.backbone.num_features, 512, bias=True),
            nn.BatchNorm1d(512),
            Swish_module()
        )
        self.backbone.global_pool = GeM()
        self.backbone.classifier = nn.Identity()
        
        # self.swish = Swish_module()
        self.metric_classify = ArcMarginProduct_subcenter(512, out_dim)


    def extract(self, x):
        return self.backbone(x)[:, :, 0, 0]

    @autocast()
    def forward(self, x):
        x = self.extract(x)
        # logits_m = self.metric_classify(self.feat(x))
        # return logits_m
        return self.feat(x)

In [None]:
out_dim = 81313

load = torch.load('./model_checkpoints/{}.pth'.format(LOAD_MODEL))
model_only_weight = {k[7:] if k.startswith('module.') else k: v for k, v in load['model_state_dict'].items()}

model = EffnetB3_Landmark(out_dim=out_dim).cuda()
model.load_state_dict(model_only_weight)
model = nn.DataParallel(model)

model = model.eval()

In [10]:
# get dataframe
# df = pd.read_csv('../input/train.csv')
tmp = np.sqrt(1 / np.sqrt(df['landmark_id'].value_counts().sort_index().values))
margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

In [15]:
np.median(margins)

0.33721154991697344

In [16]:
margins.mean()

0.3342744605698179

In [9]:
tmp = np.sqrt(1 / np.sqrt(2))
print(tmp)
(tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05

0.8408964152537145


  (tmp - tmp.min()) / (tmp.max() - tmp.min()) * 0.45 + 0.05


nan

In [3]:
# get dataframe
df = pd.read_csv('../input/train_full.csv')
df['filepath'] = df['id'].apply(lambda x: os.path.join(DATA_DIR, 'gldv2_full', x[0], x[1], x[2], f'{x}.jpg'))


In [5]:
df_small = pd.read_csv('../input/train.csv')
lm_set = set(df_small['landmark_id'])

In [6]:
df['filtered_landmark'] = df['landmark_id'].apply(lambda x: x in lm_set)

In [8]:
df.drop(columns=['url', 'filepath'], inplace=True)

In [10]:
df = df.loc[df['filtered_landmark'] == True].copy()

In [13]:
df.to_csv('../input/train_full_filtered_withLabels.csv')

In [14]:
df = pd.read_csv('../input/recognition_solution_v2.1.csv')

In [15]:
df = df.loc[df['landmarks'].isna()].copy(0)

In [17]:
df.to_csv('../input/nonLandmarks.csv')

In [None]:
# get dataframe
df = pd.read_csv('../input/train_full.csv')
df['filepath'] = df['id'].apply(lambda x: os.path.join(DATA_DIR, 'gldv2_full', x[0], x[1], x[2], f'{x}.jpg'))

dataset = LandmarkDataset(df, transform=transforms)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False, pin_memory=True)

In [16]:
# get dataframe
df = pd.read_csv('../input/recognition_solution_v2.1.csv')
# df['filepath'] = df['id'].apply(lambda x: os.path.join(DATA_DIR, 'test_2019', x[0], x[1], x[2], f'{x}.jpg'))

In [5]:
df.shape

(117577, 4)

In [7]:
for i in tqdm(range(df.shape[0])):
    assert os.path.exists(df['filepath'].iloc[0])

100%|██████████| 117577/117577 [00:00<00:00, 205144.26it/s]


In [14]:
df.shape

(117577, 4)

In [17]:
df = df.loc[df['landmarks'].isna()]
df.shape

(115605, 3)

In [18]:
df['filepath'] = df['id'].apply(lambda x: os.path.join(DATA_DIR, 'test_2019', x[0], x[1], x[2], f'{x}.jpg'))

In [20]:
df

Unnamed: 0,id,landmarks,Usage,filepath
0,e324e0f3e6d9e504,,Private,../input/test_2019/e/3/2/e324e0f3e6d9e504.jpg
1,d9e17c5f3e0c47b3,,Private,../input/test_2019/d/9/e/d9e17c5f3e0c47b3.jpg
2,1a748a755ed67512,,Public,../input/test_2019/1/a/7/1a748a755ed67512.jpg
3,537bf9bdfccdafea,,Private,../input/test_2019/5/3/7/537bf9bdfccdafea.jpg
4,13f4c974274ee08b,,Private,../input/test_2019/1/3/f/13f4c974274ee08b.jpg
...,...,...,...,...
117571,a339ef58d82dcb86,,Private,../input/test_2019/a/3/3/a339ef58d82dcb86.jpg
117573,5426472625271a4d,,Public,../input/test_2019/5/4/2/5426472625271a4d.jpg
117574,7b6a585405978398,,Public,../input/test_2019/7/b/6/7b6a585405978398.jpg
117575,d885235ba249cf5d,,Public,../input/test_2019/d/8/8/d885235ba249cf5d.jpg


In [None]:
with torch.no_grad():
    
    embeddings = np.zeros((len(df) , 512), dtype=np.float16)
    
    for idx, data in tqdm(enumerate(dataloader), total=len(dataloader)):
        
        data = data.cuda()

        with autocast():
            embedding = model(data)
        
        #break
        embeddings[idx*BATCH_SIZE:idx*BATCH_SIZE+embedding.size(0), :] = embedding.detach().cpu().numpy()

In [None]:
embeddings = np.zeros((100000 , 512), dtype=np.float16)

In [None]:

np.save("./embeddings/{}_embeddings".format(LOAD_MODEL), embeddings)