exam the similarity metric difference
1. representation cosine
2. EL2N
3. Grad Norm

In [1]:
import os, sys
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from scipy.spatial import distance
from scipy import stats
from scipy.special import softmax

sys.path.append("..")
from singleVis.data import NormalDataProvider
from singleVis.utils import find_neighbor_preserving_rate

In [2]:
VIS_METHOD = "DVI" # DeepVisualInsight
CONTENT_PATH = "/home/xianglin/projects/DVI_data/resnet18_mnist"
GPU_ID = "0"

In [3]:
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [4]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
LAMBDA1 = VISUALIZATION_PARAMETER["LAMBDA1"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

VIS_MODEL_NAME = VISUALIZATION_PARAMETER["VIS_MODEL_NAME"]
EVALUATION_NAME = VISUALIZATION_PARAMETER["EVALUATION_NAME"]

# Define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

In [5]:
import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))

In [6]:
# Define data_provider
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name="Epoch", verbose=1)
if PREPROCESS:
    data_provider._meta_data()
    if B_N_EPOCHS >0:
        data_provider._estimate_boundary(LEN//10, l_bound=L_BOUND)

Finish initialization...


# Define semantic change

In [43]:
prev_e = 14
next_e = 15

In [44]:
def gradient_diff(prev_e, next_e, x, target, data_provider, criterion, optimizer):

    model_t = data_provider.model_function(prev_e)
    model_t = model_t.to(DEVICE)

    # Forward pass and compute gradients at time t
    output_t = model_t(x)
    loss_t = criterion(output_t, target)
    optimizer.zero_grad()
    loss_t.backward()

    # Save gradients at time t
    grads_t = [p.grad.clone() for p in model_t.parameters()]

    # Simulate some training (just one step for illustration)
    optimizer.step()

    model_t1 = data_provider.model_function(next_e)
    model_t1 = model_t1.to(DEVICE)
    # Forward pass and compute gradients at time t+1
    output_t1 = model_t1(x)
    loss_t1 = criterion(output_t1, target)
    optimizer.zero_grad()
    loss_t1.backward()

    # Save gradients at time t+1
    grads_t1 = [p.grad.clone() for p in model_t1.parameters()]

    # Compute cosine similarity between gradients at t and t+1
    cos_sim_values = []
    cos = nn.CosineSimilarity(dim=0)
    for g_t, g_t1 in zip(grads_t, grads_t1):
        cos_sim = cos(g_t.flatten(), g_t1.flatten())
        cos_sim_values.append(cos_sim.item())

    # Average cosine similarity
    avg_cos_sim = sum(cos_sim_values) / len(cos_sim_values)

    # Compute cosine distance
    cos_dist = 1 - avg_cos_sim

    # print(f"Cosine Distance: {cos_dist}")
    return cos_dist, loss_t1.item()-loss_t.item(), 1- np.array(cos_sim_values)

In [45]:
def estimate(prev_el2n,next_el2n,prev_data, next_data, idx):
    esti_p = np.dot(prev_el2n[idx][:,np.newaxis],prev_data[idx][None,:]).flatten()
    esti_n = np.dot(next_el2n[idx][:,np.newaxis],next_data[idx][None,:]).flatten()
    dist = distance.cosine(esti_p, esti_n)
    return dist


In [46]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

training_data = data_provider._training_data()
targets = data_provider.train_labels(prev_e)

test_len = 500
idxs = np.random.choice(len(training_data), test_len, replace=False)

dists = np.zeros(test_len)
repr_gradient = np.zeros(test_len)
for i in range(test_len):
    x = training_data[idxs[i]:idxs[i]+1]
    y = torch.from_numpy(targets[idxs[i]:idxs[i]+1]).to(DEVICE)
    dist, loss, all_dists = gradient_diff(prev_e, next_e, x, y, data_provider, criterion, optimizer)
    dists[i] = dist
    repr_gradient[i] = all_dists[-2]

In [47]:
stats.pearsonr(dists, repr_gradient)

PearsonRResult(statistic=0.11986999722237833, pvalue=0.007288922535017365)

In [48]:
# compute EL2N
prev_data = data_provider.train_representation(prev_e)
next_data = data_provider.train_representation(next_e)
train_labels = data_provider.train_labels(prev_e)
prev_pw = data_provider.get_pred(next_e, prev_data)
next_pw = data_provider.get_pred(next_e, next_data)
y = np.eye(np.max(train_labels)+1)[train_labels]

prev_pw = softmax(prev_pw, axis=1)
next_pw = softmax(next_pw, axis=1)
prev_el2n = prev_pw-y
next_el2n = next_pw-y

100%|██████████| 300/300 [00:00<00:00, 7308.04it/s]
100%|██████████| 300/300 [00:00<00:00, 7416.59it/s]


In [49]:
estimated_gradient_dists = np.array([estimate(prev_el2n,next_el2n,prev_data, next_data, idxs[i]) for i in range(len(idxs))])
stats.pearsonr(dists, estimated_gradient_dists)

PearsonRResult(statistic=0.11643468987979023, pvalue=0.009163021666833407)

In [50]:
print("Temporal repr")
repr_dists = np.array([distance.cosine(prev_data[idxs[i]], next_data[idxs[i]]) for i in range(len(idxs))])
repr_dists_eu = np.array([distance.euclidean(prev_data[idxs[i]], next_data[idxs[i]]) for i in range(len(idxs))])

Temporal repr


In [51]:
stats.pearsonr(repr_dists, dists), stats.pearsonr(repr_dists_eu, dists)

(PearsonRResult(statistic=0.13683122187522068, pvalue=0.0021664183521433993),
 PearsonRResult(statistic=0.26184099581678755, pvalue=2.77627024892479e-09))

In [52]:
# # neighbor preserving rate
# npr_eu = find_neighbor_preserving_rate(prev_data[idxs], next_data[idxs], n_neighbors=15, metric="euclidean")
# npr_cosine = find_neighbor_preserving_rate(prev_data[idxs], next_data[idxs], n_neighbors=15, metric="cosine")
# stats.spearmanr(npr_eu, dists), stats.spearmanr(npr_cosine, dists)