In [1]:
import math
import time
import pickle
import random
import argparse
import albumentations
import numpy as np
import pandas as pd
from tqdm import tqdm as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset

#import cudf, cuml, cupy
#from cuml.feature_extraction.text import TfidfVectorizer
from cuml.neighbors import NearestNeighbors

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors as NearestNeighbors1

from tqdm import tqdm

from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
import cv2
import numpy as np
import albumentations
import torch
from torch.utils.data import Dataset

class ShopeeDataset(Dataset):
    def __init__(self, df, path, transform=None):
        self.df = df
        self.path = path
        self.file_names = df['image'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = self.path + file_name
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        image = image.astype(np.float32)
        image = image.transpose(2, 0, 1)

        return torch.tensor(image).float()
    
def get_transforms(image_size):

    transforms_train = albumentations.Compose([
        albumentations.HorizontalFlip(p=0.5),
        albumentations.ImageCompression(quality_lower=99, quality_upper=100),
        albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=10, border_mode=0, p=0.7),
        albumentations.Resize(image_size, image_size),
        albumentations.Cutout(max_h_size=int(image_size * 0.4), max_w_size=int(image_size * 0.4), num_holes=1, p=0.5),
        albumentations.Normalize()
    ])

    transforms_val = albumentations.Compose([
        albumentations.Resize(628, 628),
        albumentations.CenterCrop(image_size, image_size, p=1.0),
        albumentations.Normalize()
    ])

    return transforms_train, transforms_val

from typing import Dict, Tuple, Any

def global_average_precision_score(
        y_true: Dict[Any, Any],
        y_pred: Dict[Any, Tuple[Any, float]]
) -> float:
    """
    Compute Global Average Precision score (GAP)
    Parameters
    ----------
    y_true : Dict[Any, Any]
        Dictionary with query ids and true ids for query samples
    y_pred : Dict[Any, Tuple[Any, float]]
        Dictionary with query ids and predictions (predicted id, confidence
        level)
    Returns
    -------
    float
        GAP score
    """
    indexes = list(y_pred.keys())
    indexes.sort(
        key=lambda x: -y_pred[x][1],
    )
    queries_with_target = len([i for i in y_true.values() if i is not None])
    correct_predictions = 0
    total_score = 0.
    for i, k in enumerate(indexes, 1):
        relevance_of_prediction_i = 0
        if y_true[k] == y_pred[k][0]:
            correct_predictions += 1
            relevance_of_prediction_i = 1
        precision_at_rank_i = correct_predictions / i
        total_score += precision_at_rank_i * relevance_of_prediction_i

    return 1 / queries_with_target * total_score

import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import sys
sys.path.append('../input/timm-shopee')
import timm
from torch.nn.parameter import Parameter


class Swish(torch.autograd.Function):

    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))

class swish(nn.Module):
    def forward(self, x):
        return Swish.apply(x)

class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        out = torch.nn.functional.relu6(x + 3., self.inplace) / 6.
        return out * x

class CrossEntropyLossWithLabelSmoothing(nn.Module):
    def __init__(self, n_dim, ls_=0.9):
        super().__init__()
        self.n_dim = n_dim
        self.ls_ = ls_

    def forward(self, x, target):
        target = F.one_hot(target, self.n_dim).float()
        target *= self.ls_
        target += (1 - self.ls_) / self.n_dim

        logprobs = torch.nn.functional.log_softmax(x, dim=-1)
        loss = -logprobs * target
        loss = loss.sum(-1)
        return loss.mean()


class DenseCrossEntropy(nn.Module):
    def forward(self, x, target):
        x = x.float()
        target = target.float()
        logprobs = torch.nn.functional.log_softmax(x, dim=-1)

        loss = -logprobs * target
        loss = loss.sum(-1)
        return loss.mean()


class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features*k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine   


class ArcFaceLossAdaptiveMargin(nn.modules.Module):
    def __init__(self, margins, s=30.0):
        super().__init__()
        self.crit = DenseCrossEntropy()
        self.s = s
        self.margins = margins
            
    def forward(self, logits, labels, out_dim):
        ms = []
        ms = self.margins[labels.cpu().numpy()]
        cos_m = torch.from_numpy(np.cos(ms)).float().cuda()
        sin_m = torch.from_numpy(np.sin(ms)).float().cuda()
        th = torch.from_numpy(np.cos(math.pi - ms)).float().cuda()
        mm = torch.from_numpy(np.sin(math.pi - ms) * ms).float().cuda()
        labels = F.one_hot(labels, out_dim).float()
        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * cos_m.view(-1,1) - sine * sin_m.view(-1,1)
        phi = torch.where(cosine > th.view(-1,1), phi, cosine - mm.view(-1,1))
        output = (labels * phi) + ((1.0 - labels) * cosine)
        output *= self.s
        loss = self.crit(output, labels)
        return loss

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=True):
        super(GeM,self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1)*p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [3]:
class Model_Shopee(nn.Module):

    def __init__(self, model_name, out_dim):
        super(Model_Shopee, self).__init__()

        self.net = timm.create_model(model_name, pretrained=False)
        self.net.reset_classifier(0, '')
        self.embedding_size = 512

        self.global_pool = GeM()

        self.neck = nn.Sequential(
            nn.Linear(self.net.num_features, self.embedding_size, bias=True),
            nn.BatchNorm1d(self.embedding_size),
            torch.nn.PReLU()
        )


        self.metric_classify = ArcMarginProduct_subcenter(self.embedding_size, out_dim)

    def extract(self, x):
        return self.net.forward_features(x)

    def forward(self, x):
        x = self.extract(x)
        x = self.global_pool(x)
        x = x[:, :, 0, 0]
        x = self.neck(x)
        logits_m = self.metric_classify(x)

        return F.normalize(x), logits_m

In [4]:
ModelClass = Model_Shopee
image_size = 560
model_name = 'resnet200d'
load_from = '../input/checkpoint-test/r200_SD_560_b8_f0_10ep_ftlr4e-6_0.2.pth'
batch_size = 64
num_workers = 4

GET_CV=False


test = pd.read_csv('../input/shopee-product-matching/test.csv')
if len(test)>3: GET_CV = False

transforms_train, transforms_val = get_transforms(image_size)
print(GET_CV)

False


In [5]:
def getMetric(col):
    def f1score(row):
        n = len( np.intersect1d(row.target,row[col]) )
        return 2*n / (len(row.target)+len(row[col]))
    return f1score

CHECK_SUB=False
def read_dataset():
    if GET_CV:
        df = pd.read_csv('../input/shopee-product-matching/train.csv')
        tmp = df.groupby(['label_group'])['posting_id'].unique().to_dict()
        df['matches'] = df['label_group'].map(tmp)
        df['matches'] = df['matches'].apply(lambda x: ' '.join(x))
        if CHECK_SUB:
            df = pd.concat([df, df], axis = 0)
            df.reset_index(drop = True, inplace = True)
        #df_cu = cudf.DataFrame(df)
        image_paths = '../input/shopee-product-matching/train_images/'
    else:
        df = pd.read_csv('../input/shopee-product-matching/test.csv')
        #df_cu = cudf.DataFrame(df)
        image_paths = '../input/shopee-product-matching/test_images/'
        
    return df, image_paths

In [6]:
out_dim=11014
test,IMG_PATH=read_dataset()
print(IMG_PATH)

../input/shopee-product-matching/test_images/


In [7]:
KNN = 50
if len(test)==3: KNN = 2
print('KNN dim is ',KNN)

KNN dim is  2


In [8]:
# Function to get our f1 score
import gc
def f1_score(y_true, y_pred):
    y_true = y_true.apply(lambda x: set(x.split()))
    y_pred = y_pred.apply(lambda x: set(x.split()))
    intersection = np.array([len(x[0] & x[1]) for x in zip(y_true, y_pred)])
    len_y_pred = y_pred.apply(lambda x: len(x)).values
    len_y_true = y_true.apply(lambda x: len(x)).values
    f1 = 2 * intersection / (len_y_pred + len_y_true)
    return f1


def get_neighbors(test, embeddings, KNN = 50, image = True):    
    if image:
        model = NearestNeighbors(n_neighbors = KNN)
        model.fit(embeddings)
        distances, indices = model.kneighbors(embeddings)
    else:
        model = NearestNeighbors1(n_neighbors = KNN, metric = 'cosine').fit(embeddings)
        distances, indices = model.kneighbors(embeddings)
    
    # Iterate through different thresholds to maximize cv, run this in interactive mode, then replace else clause with a solid threshold
    if GET_CV:
        if image:
            thresholds = list(np.arange(0.5, 1.1, 0.05))
        else:
            thresholds = list(np.arange(0.15, 0.6, 0.05))
        scores = []
        for threshold in thresholds:
            predictions = []
            for k in range(embeddings.shape[0]):
                idx = np.where(distances[k,] < threshold)[0]
                ids = indices[k,idx]
                posting_ids = ' '.join(test['posting_id'].iloc[ids].values)
                predictions.append(posting_ids)
            test['pred_matches'] = predictions
            test['f1'] = f1_score(test['matches'], test['pred_matches'])
            score = test['f1'].mean()
            print(f'Our f1 score for threshold {threshold} is {score}')
            scores.append(score)
        thresholds_scores = pd.DataFrame({'thresholds': thresholds, 'scores': scores})
        max_score = thresholds_scores[thresholds_scores['scores'] == thresholds_scores['scores'].max()]
        best_threshold = max_score['thresholds'].values[0]
        best_score = max_score['scores'].values[0]
        print(f'Our best score is {best_score} and has a threshold {best_threshold}')
        
        # Use threshold
        predictions = []
        for k in range(embeddings.shape[0]):
            # Because we are predicting the test set that have 70K images and different label groups, confidence should be smaller
            if image:
                idx = np.where(distances[k,] < 0.95)[0]
            else:
                idx = np.where(distances[k,] < 0.25)[0]
            ids = indices[k,idx]
            posting_ids = test['posting_id'].iloc[ids].values
            predictions.append(posting_ids)
    
    # Because we are predicting the test set that have 70K images and different label groups, confidence should be smaller
    else:
        predictions = []
        for k in tqdm(range(embeddings.shape[0])):
            if image:
                idx = np.where(distances[k,] < 0.95)[0]
            else:
                idx = np.where(distances[k,] < 0.25)[0]
            ids = indices[k,idx]
            posting_ids = test['posting_id'].iloc[ids].values
            predictions.append(posting_ids)
        
    del model, distances, indices
    gc.collect()
    return test, predictions

In [9]:
print('Computing text embeddings...')
model_text = TfidfVectorizer(stop_words=None, 
                        binary=True, 
                        max_features=21500)
text_embeddings = model_text.fit_transform(test.title)
#text_embeddings=text_embeddings.get()
print('text embeddings shape',text_embeddings.shape)

del model_text
gc.collect()

'''
preds = []
CHUNK = 1024*4

print('Finding similar titles...')
CTS = len(test)//CHUNK
if len(test)%CHUNK!=0: CTS += 1
for j in range( CTS ):
    
    a = j*CHUNK
    b = (j+1)*CHUNK
    b = min(b,len(test))
    print('chunk',a,'to',b)
    
    #COSINE SIMILARITY DISTANCE
    cts = cupy.matmul(text_embeddings, text_embeddings[a:b].T).T
    
    for k in range(b-a):
        IDX = cupy.where(cts[k,]>0.75)[0]
        o = test.iloc[cupy.asnumpy(IDX)].posting_id.values
        preds.append(o)
        
test['preds_text'] = preds
test.head()
'''

Computing text embeddings...
text embeddings shape (3, 28)


"\npreds = []\nCHUNK = 1024*4\n\nprint('Finding similar titles...')\nCTS = len(test)//CHUNK\nif len(test)%CHUNK!=0: CTS += 1\nfor j in range( CTS ):\n    \n    a = j*CHUNK\n    b = (j+1)*CHUNK\n    b = min(b,len(test))\n    print('chunk',a,'to',b)\n    \n    #COSINE SIMILARITY DISTANCE\n    cts = cupy.matmul(text_embeddings, text_embeddings[a:b].T).T\n    \n    for k in range(b-a):\n        IDX = cupy.where(cts[k,]>0.75)[0]\n        o = test.iloc[cupy.asnumpy(IDX)].posting_id.values\n        preds.append(o)\n        \ntest['preds_text'] = preds\ntest.head()\n"

In [10]:
test, predictions = get_neighbors(test, text_embeddings, KNN, image = False)
test['preds_text'] = predictions
del text_embeddings
gc.collect()

100%|██████████| 3/3 [00:00<00:00, 4607.44it/s]


0

In [11]:
# tokenizer = DistilBertTokenizer.from_pretrained('../input/bert-pretrained/distilbert-base-uncased/distilbert-base-uncased/')
# dataset_test = ShopeeImageTextDataset(test, IMG_PATH, transform=transforms_val, tokenizer=tokenizer)

dataset_test = ShopeeDataset(test, IMG_PATH, transform=transforms_val)

test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, num_workers=num_workers)

In [12]:
model = ModelClass(model_name, out_dim=out_dim)
model = model.cuda()
checkpoint = torch.load(load_from,  map_location='cpu')
state_dict = checkpoint['model_state_dict']
state_dict = {k[7:] if k.startswith('module.') else k: state_dict[k] for k in state_dict.keys()}    
model.load_state_dict(state_dict, strict=False)

_IncompatibleKeys(missing_keys=[], unexpected_keys=['SCSE.channel_excitation.0.weight', 'SCSE.channel_excitation.0.bias', 'SCSE.channel_excitation.2.weight', 'SCSE.channel_excitation.2.bias', 'SCSE.spatial_se.0.weight', 'SCSE.spatial_se.0.bias', 'global_pool1.p', 'neck1.0.weight', 'neck1.0.bias', 'neck1.1.weight', 'neck1.1.bias', 'neck1.1.running_mean', 'neck1.1.running_var', 'neck1.1.num_batches_tracked', 'neck1.2.weight'])

In [13]:
embeds = []

with torch.no_grad():
    for data in tqdm(test_loader):
        data = data.cuda()
        feat, _ = model(data)
        image_embeddings = feat.detach().cpu().numpy()
        embeds.append(image_embeddings)

# with torch.no_grad():
#     for (data, input_ids, attention_mask) in tqdm(test_loader):
#         data, input_ids, attention_mask = data.cuda(), input_ids.cuda(), attention_mask.cuda()
#         feat, _ = model(data, input_ids, attention_mask)
#         image_embeddings = feat.detach().cpu().numpy()
#         embeds.append(image_embeddings)
        
image_embeddings = np.concatenate(embeds)
print('image embeddings shape',image_embeddings.shape)

100%|██████████| 1/1 [00:01<00:00,  1.26s/it]

image embeddings shape (3, 512)





In [14]:
test, image_predictions = get_neighbors(test, image_embeddings, KNN, image = True)
test['preds_image'] = image_predictions
del image_embeddings
gc.collect()

100%|██████████| 3/3 [00:00<00:00, 2382.68it/s]


0

In [15]:
# tmp = test.groupby('image_phash').posting_id.agg('unique').to_dict()
# test['preds_phash'] = test.image_phash.map(tmp)
# test.head()

In [16]:
def combine_for_sub(row):
    x = np.concatenate([row.preds_image,row.preds_text])
    return ' '.join(np.unique(x))

def combine_for_cv(row):
    x = np.concatenate([row.preds_image,row.preds_text])
    return np.unique(x)

In [17]:
if GET_CV:
    tmp = test.groupby('label_group').posting_id.agg('unique').to_dict()
    test['target'] = test.label_group.map(tmp)
    test['oof'] = test.apply(combine_for_cv,axis=1)
    test['f1'] = test.apply(getMetric('oof'),axis=1)
    print('CV Score =', test.f1.mean() )
    print('===========================================\n')
    
    print("CV for image :", round(test.apply(getMetric('preds_image'),axis=1).mean(), 3))
    print("CV for text  :", round(test.apply(getMetric('preds_text'),axis=1).mean(), 3))
#     print("CV for phash :", round(test.apply(getMetric('preds_phash'),axis=1).mean(), 3))

In [18]:
test['matches'] = test.apply(combine_for_sub,axis=1)
test[['posting_id','matches']].to_csv('submission.csv',index=False)
sub = pd.read_csv('submission.csv')
sub.head()

Unnamed: 0,posting_id,matches
0,test_2255846744,test_2255846744
1,test_3588702337,test_3588702337
2,test_4015706929,test_4015706929
