Precomputing embeddings of the training dataset to accelerate the submission

In [None]:
import operator
import gc
import pathlib
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as K
from scipy import spatial
import cv2
import math
from tqdm.notebook import tqdm as tqdm
from functools import partial

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
GCS_PATH = "gs://landmark-recognition-2020"
BATCH_SIZE = 64
IMAGE_SIZE = [736, 736]
NUM_TO_RERANK = 1
NUM_PUBLIC_TEST_IMAGES = 10345
NUM_TRAIN_IMAGES = 1580470
NUMBER_OF_CLASSES = 81313
NUM_EMBEDDING_DIMENSIONS = 512
DATASET_DIR = '/content/drive/MyDrive/kaggle/google-landmark-recognition-2021/train.csv'
TEST_IMAGE_DIR = '/content/test'
TRAIN_IMAGE_DIR = '../input/landmark-recognition-2021/train'

In [None]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def read_tfrecord(example, labeled):
    tfrecord_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_id": tf.io.FixedLenFeature([], tf.string),
        'landmark_id': tf.io.FixedLenFeature([], tf.int64)
    } if labeled else {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_id": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example['image'])
    if labeled:
        label = tf.cast(example['landmark_id'], tf.int32)
        label = tf.one_hot(label, N_CATEGORIES)
        return image, label
    idnum = example['image_id']
    return image, idnum

def load_dataset(filenames, labeled=True, ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE)
    return dataset

def get_dataset(filepaths, labeled=True, ordered=False):
    dataset = load_dataset(filepaths, labeled=labeled, ordered=ordered)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
#         y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(y, dtype=cosine.dtype)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [None]:
def get_model():
    margin = ArcMarginProduct(n_classes=NUMBER_OF_CLASSES, s=64, m=0.05, 
                            name='head/arc_margin', dtype='float32')
    img_adjust_layer = tf.keras.layers.Lambda(tf.keras.applications.resnet_v2.preprocess_input, input_shape=[*IMAGE_SIZE, 3])
    base_model = tf.keras.applications.resnet_v2.ResNet152V2(weights=None, include_top=False)

    image = tf.keras.Input(shape=(*IMAGE_SIZE,3), name='inp1')
    label = tf.keras.Input(shape=(NUMBER_OF_CLASSES,), name='inp2')

    x = img_adjust_layer(image)
    x = base_model(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(512)(x)
    x = margin([x, label])
    output = tf.keras.layers.Softmax(dtype='float32')(x)

    model = tf.keras.models.Model(inputs = [image, label], outputs = [output])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss=[tf.keras.losses.CategoricalCrossentropy()],  
        metrics=[tf.keras.metrics.CategoricalAccuracy()])

    return model

In [None]:
model = get_model()
model.load_weights('/content/drive/MyDrive/kaggle/google-landmark-recognition-2021/resnetv2/best_weights.h5')
model = tf.keras.models.Model(inputs = model.input[0], outputs = model.layers[-4].output)

In [None]:
def generate_embeddings(filepaths, model, size):
    dataset = get_dataset(filepaths, labeled=False, ordered=False)
    ids = np.empty((size,), dtype='<U16')
    embeddings = np.empty((size, NUM_EMBEDDING_DIMENSIONS))
    num_batches = math.ceil(size/BATCH_SIZE)

    for i, batch in tqdm(zip(range(0, size, BATCH_SIZE),dataset), total=num_batches):
        image, idnum = batch
        prediction = model.predict(image) #batch_size, 512
        ids[i:i+BATCH_SIZE] = idnum.numpy().astype('str')
        embeddings[i:i+BATCH_SIZE] = prediction
        del image, idnum, batch, prediction
        gc.collect()

    return ids, embeddings

In [None]:
train_ids, ids = generate_embeddings(filepaths, model, size)

In [None]:
with open('/content/drive/MyDrive/kaggle/google-landmark-recognition-2021/embeddings.npy', 'wb') as f:
    np.save(f, train_ids)
    np.save(f, ids)