exam the similarity metric difference
1. representation cosine
2. EL2N
3. Grad Norm

In [2]:
import os, sys
import json
import torch
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
from pynndescent import NNDescent
from scipy import stats

sys.path.append("..")
from singleVis.SingleVisualizationModel import VisModel
from singleVis.data import NormalDataProvider

In [3]:
VIS_METHOD = "DVI" # DeepVisualInsight
CONTENT_PATH = "/home/xianglin/projects/DVI_data/resnet18_mnist"
GPU_ID = "0"

In [4]:
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [5]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
LAMBDA1 = VISUALIZATION_PARAMETER["LAMBDA1"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

VIS_MODEL_NAME = VISUALIZATION_PARAMETER["VIS_MODEL_NAME"]
EVALUATION_NAME = VISUALIZATION_PARAMETER["EVALUATION_NAME"]

# Define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

In [6]:
import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))

In [7]:
# Define data_provider
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name="Epoch", verbose=1)
if PREPROCESS:
    data_provider._meta_data()
    if B_N_EPOCHS >0:
        data_provider._estimate_boundary(LEN//10, l_bound=L_BOUND)

Finish initialization...


In [8]:
def nn(train_data):
    # number of trees in random projection forest
    n_trees = min(64, 5 + int(round((train_data.shape[0]) ** 0.5 / 20.0)))
    # max number of nearest neighbor iters to perform
    n_iters = max(5, int(round(np.log2(train_data.shape[0]))))
    # distance metric
    metric = "cosine"
    # get nearest neighbors
    nnd = NNDescent(
        train_data,
        n_neighbors=2,
        metric=metric,
        n_trees=n_trees,
        n_iters=n_iters,
        max_candidates=60,
        verbose=False
    )
    knn_indices, _ = nnd.neighbor_graph
    return knn_indices[:, 1]

In [55]:
# compute EL2N
prev_e = 1
next_e = 15
prev_data = data_provider.train_representation(prev_e)
next_data = data_provider.train_representation(next_e)
train_labels = data_provider.train_labels(prev_e)
prev_pw = data_provider.get_pred(next_e, prev_data)
next_pw = data_provider.get_pred(next_e, next_data)
y = np.eye(np.max(train_labels)+1)[train_labels]

100%|██████████| 300/300 [00:00<00:00, 7878.35it/s]
100%|██████████| 300/300 [00:00<00:00, 7769.44it/s]


In [59]:
next_el2n = next_pw-y
indices = nn(next_el2n)
closest_el2n = next_el2n[indices]
print("Closest sample in spatial (el2n)")
closest_el2n_sim = np.array([distance.cosine(closest_el2n[i], next_el2n[i]) for i in range(len(prev_data))])
# closest_el2n_sim = np.array([distance.euclidean(closest_el2n[i], next_el2n[i]) for i in range(len(prev_data))])
closest_el2n_sim.mean(), closest_el2n_sim.max(), closest_el2n_sim.min(), np.sum(closest_el2n_sim<closest_el2n_sim.mean())

Closest sample in spatial (el2n)


(0.0005739769775291844, 0.09793074125567369, 8.052173656292183e-06, 48744)

In [60]:
indices = nn(next_data)
closest_repr = next_data[indices]
print("Closest sample in spatial (repr)")
closest_repr_sim = np.array([distance.cosine(closest_repr[i], next_data[i]) for i in range(len(prev_data))])
# closest_repr_sim = np.array([distance.euclidean(closest_repr[i], next_data[i]) for i in range(len(prev_data))])
closest_repr_sim.mean(), closest_repr_sim.max(), closest_repr_sim.min(), np.sum(closest_repr_sim<closest_repr_sim.mean())

Closest sample in spatial (repr)


(0.008070171812156472, 0.3080023724526937, 0.0007488635185154413, 39615)

In [61]:
stats.spearmanr(closest_repr_sim, closest_el2n_sim)

SpearmanrResult(correlation=0.8271142389924493, pvalue=0.0)

In [65]:
prev_el2n = prev_pw-y
next_el2n = next_pw-y
print("Temporal EL2N")
el2n_sim = np.array([distance.cosine(prev_el2n[i], next_el2n[i]) for i in range(len(prev_data))])
el2n_sim.mean(), el2n_sim.max(), el2n_sim.min(), np.sum(el2n_sim<el2n_sim.mean())

Temporal EL2N


(0.1456928245389202, 1.672087202494822, 0.002040858975412174, 41572)

In [66]:
print("Temporal repr")
repr_sim = np.array([distance.cosine(prev_data[i], next_data[i]) for i in range(len(prev_data))])
repr_sim.mean(), repr_sim.max(), repr_sim.min(), np.sum(repr_sim<repr_sim.mean())

Temporal repr


(0.3463747952278759, 0.7402724352700767, 0.17314195437540048, 34115)

In [67]:
stats.spearmanr(repr_sim, el2n_sim)

SpearmanrResult(correlation=0.8815384769125941, pvalue=0.0)