In [None]:
from datautils import get_loader
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# param
height = 224
width = 224
batch_size = 16
num_workers = 0
pin_memory = True
shuffle = True
drop_last = True
perplexity = 30

In [None]:
# models
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
del model._modules['fc']
encoder = nn.Sequential(*list(model._modules.values()))
encoder.to(_device)
loader = get_loader(height, width, batch_size, num_workers, 
                    pin_memory=pin_memory, shuffle=shuffle, drop_last=drop_last)

In [None]:
# get embeddings
e1s, e2s = [], []
c1s, c2s = [], []
tags, sc_tags, co_tags = [], [], []
for idx, (i1, i2, c1, c2, tag, sc_tag, co_tag) in enumerate(loader):
    # sent to gpu
    i1 = i1.to(_device)
    i2 = i2.to(_device)
    # record embeddings
    e1s.append(encoder(i1).squeeze().cpu().numpy())
    e2s.append(encoder(i2).squeeze().cpu().numpy())
    # record tags and captions
    c1s += c1
    c2s += c2
    tags += tag
    sc_tags += sc_tag
    co_tags += co_tag

e1s = np.concatenate(e1s, 0)
e2s = np.concatenate(e2s, 0)

In [None]:
def cosine_similarity(V1, V2):
    U1 = V1 / np.linalg.norm(V1, axis=1)
    U2 = V2 / np.linalg.norm(V2, axis=1)
    distance = np.diag(np.diag(U1@U2.T))
    return np.clip(distance, -1, 1)

In [None]:
D = cosine_similarity(e1s, e2s)
df = pd.DataFrame({'dist': D, 'secondary_tags': sc_tags, 'collapsed_tags': co_tags})

In [None]:
sns.boxplot(data=df, x='secondary_tags', y='dist', hue='collapsed_tags', split=True, gap=.1, inner="quart")

In [None]:
arrays = [tags, sc_tags, co_tags]
index = pd.MultiIndex.from_arrays(arrays, names=('tag', 'secondary_tag', 'collapsed_tag'))
df = pd.DataFrame({'distance': D}, index=index)
md_sc = df.groupby(level='secondary_tag').mean()['distance'].values
md_t = df.groupby(level='tag').mean()['distance'].values
md_co = df.groupby(level='collapsed_tag').mean()['distance'].values

In [None]:
# t-sne 
tsne = TSNE(n_components=2, perplexity=perplexity)
e1s_tsne = tsne.fit_transform(e1s)  # 400, n
e2s_tsne = tsne.fit_transform(e2s)  # 400, n

In [None]:
# grad cam