In [1]:
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"
import sys
sys.path.append("..")
from datasets.hollywood2 import build_hollywood2_image_dataset, build_hollywood2_image_pair_dataset
from infi_tf import load_infi_reuse
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [2]:
infi = load_infi_reuse("weights/hollywood2_gc_reuse/20230618-204304-Epoch20.h5")
infi.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input-1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
input-2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
model (Functional)              (None, 200)          24486       input-1[0][0]                    
                                                                 input-2[0][0]                    
__________________________________________________________________________________________________
tf.math.subtract (TFOpLambda)   (None, 200)          0           model[0][0]                

In [3]:
# use the embedding module to compute features
emb_model = infi.layers[2]
emb_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
activation (Activation)         (None, 224, 224, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
separable_conv2d (SeparableConv (None, 224, 224, 32) 155         activation[0][0]                 
__________________________________________________________________________________________________
layer_normalization (LayerNorma (None, 224, 224, 32) 64          separable_conv2d[0][0]           
______________________________________________________________________________________________

In [4]:
# compute embeddings
x_test = build_hollywood2_image_dataset(list_path="../datasets/Hollywood2/list_test.txt",
                                        root_path="../datasets/Hollywood2/frames/").batch(10)
emb_test = emb_model.predict(x_test)

In [5]:
emb_test.shape

(884, 200)

Then you can use the 200-dim embeddings for K-Nearest-Neighbor retrieval.

Note that, InFi-Reuse uses a learnable similarity function, i.e., the last dense layer.

In [19]:
# define customized distance function to use sklearn KNN
def mydist(x, y):
    def sigmoid(z):
        return 1/(1+np.exp(-z))
    
    global clf_w, clf_b
    sim = sigmoid(np.dot(np.abs(x - y), clf_w) + clf_b)
    return 1-sim

In [10]:
# checking consistency
x_temp = build_hollywood2_image_pair_dataset(list_path="../datasets/Hollywood2/list_pair_gc_test.txt",
                                             root_path="../datasets/Hollywood2/frames/", load_label=False).batch(1)
x_elem = next(iter(x_temp))
infi_out = infi.predict(x_elem)
infi_out

array([[0.5299884]], dtype=float32)

In [23]:
infi_out = infi.predict(x_elem)
1-infi_out

array([[0.4700116]], dtype=float32)

In [24]:
emb0 = emb_model.predict(x_elem[0])[0]
emb1 = emb_model.predict(x_elem[1])[0]
mydist(emb0, emb1)

array([0.4700116], dtype=float32)

we can see that these two results are identical.

In [26]:
cached_emb = emb_test[:100]

nbrs = NearestNeighbors(n_neighbors=10, metric=mydist)
nbrs.fit(cached_emb)

NearestNeighbors(metric=<function mydist at 0x0000029FB71E53A8>, n_neighbors=10)

In [31]:
new_emb = emb_test[101:102]
nbrs.kneighbors(new_emb)

(array([[0.25247723, 0.26649195, 0.27525902, 0.28641641, 0.28749996,
         0.2923674 , 0.29436862, 0.29524779, 0.31441182, 0.31470031]]),
 array([[98, 80, 94, 32,  4, 18, 15, 24, 51, 26]], dtype=int64))