Try to skip missing epoch and use prediction to estimate reference

# Load module

In [1]:
import os, sys
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from scipy.spatial import distance

sys.path.append("..")
from singleVis.data import NormalDataProvider
from singleVis.projector import DVIProjector
from singleVis.vis_models import vis_models as vmodels

2023-09-03 18:28:06.817115: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
VIS_METHOD = "tDVI" # DeepVisualInsight
CONTENT_PATH = "/home/xianglin/projects/DVI_data/resnet18_mnist"
GPU_ID = "2"

In [3]:
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [4]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
EPOCH_NAME = config["EPOCH_NAME"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
VIS_MODEL = VISUALIZATION_PARAMETER['VIS_MODEL']
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

# Define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

In [5]:
import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))

In [6]:
# VIS_MODEL_NAME = "tDVI_cnAE_normal"
# EVALUATION_NAME = "evaluation_tDVI_cnAE_normal"

In [7]:
# Define data_provider
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name=EPOCH_NAME, verbose=1)
if PREPROCESS:
    data_provider._meta_data()
    if B_N_EPOCHS >0:
        data_provider._estimate_boundary(LEN//10, l_bound=L_BOUND)

Finish initialization...


# estimate change

In [8]:
# VIS_MODEL_NAME = "tDVI_baseAE"
# VIS_MODEL = "baseAE"
VIS_MODEL_NAME = "tDVI_cnAE"
VIS_MODEL = "cnAE"

In [9]:
# VIS_MODEL_NAME = "tDVI_cnAE_different-margins"
model = vmodels[VIS_MODEL](ENCODER_DIMS, DECODER_DIMS)
projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)

In [10]:
def uncertainty_measure(prev_data, next_data):
    mean_t = prev_data.mean(axis=0)
    std_t = prev_data.std(axis=0)
    mean_tk = next_data.mean(axis=0)
    std_tk = next_data.std(axis=0)
    # Method 2: Sum of Z-scores (averaged over all features)
    z_score = np.abs(mean_tk - mean_t) / np.sqrt(std_t ** 2 + std_tk ** 2)
    uncertainty1 = np.mean(z_score)
    # print(f"{uncertainty1*100:.2f}", "%")

    # Method 3: Euclidean Distance in Mean-Std Space (averaged over all features)
    uncertainty2 = np.mean(np.sqrt((mean_tk - mean_t)**2 + (std_tk - std_t)**2))
    # print(f"{uncertainty2*100:.2f}", "%")

    # Method 4: Mahalanobis Distance (simplified, averaged over all features)
    uncertainty3 = np.mean(np.sqrt((mean_tk - mean_t)**2 / std_t + (std_tk - std_t)**2 / std_t))
    # print(f"{uncertainty3*100:.2f}", "%")

    # Assuming variables are uncorrelated, create diagonal covariance matrices
    covariance_matrix_t = np.diag(std_t**2)
    covariance_matrix_t_plus_k = np.diag(std_tk**2)

    # Calculate the Mahalanobis distance
    uncertainty4 = distance.mahalanobis(mean_t, mean_tk, np.linalg.inv(covariance_matrix_t))
    return uncertainty1, uncertainty2, uncertainty3, uncertainty4


In [11]:
u1 = np.zeros(199)
u2 = np.zeros(199)
u3 = np.zeros(199)
u4 = np.zeros(199)
for i in range(1,200,1):
    for j in range(i+1, i+2, 1):
        prev_data = data_provider.train_representation(i)
        next_data = data_provider.train_representation(j)
        u1[i-1], u2[i-1], u3[i-1], u4[i-1] = uncertainty_measure(prev_data, next_data)

KeyboardInterrupt: 

In [None]:
from scipy import stats
stats.pearsonr(u1,u4),stats.pearsonr(u2,u4),stats.pearsonr(u3,u4)

(PearsonRResult(statistic=0.9984572330083197, pvalue=1.1366100395814112e-60),
 PearsonRResult(statistic=0.923518720312815, pvalue=3.4389914303013525e-21),
 PearsonRResult(statistic=0.9783368519778206, pvalue=8.41800761245911e-34))

In [None]:
prev_low = projector.batch_project(prev_e, prev_data)
next_low = projector.batch_project(next_e, next_data)
estimated_next_low = projector.batch_project(prev_e, next_data)

Successfully load the DVI visualization model for iteration 1
Successfully load the DVI visualization model for iteration 5
Successfully load the DVI visualization model for iteration 1


In [None]:
high_change = np.array([distance.cosine(prev_data[i], next_data[i]) for i in range(len(prev_data))])
low_change = np.linalg.norm(next_low-prev_low, axis=1)
estimated_low_change = np.linalg.norm(estimated_next_low-prev_low, axis=1)

In [None]:
from scipy import stats
print("True", stats.spearmanr(high_change, low_change))
print("Estimated High", stats.spearmanr(high_change, estimated_low_change))

True SignificanceResult(statistic=0.5902571305668802, pvalue=0.0)
Estimated High SignificanceResult(statistic=0.578765585897823, pvalue=0.0)


# eval temporal

In [8]:
from singleVis.eval.evaluator import Evaluator
from singleVis.eval.evaluate import *

In [9]:
VIS_MODEL_NAME = "tDVI_cnAE_estimation"
VIS_MODEL = "cnAE"
model = vmodels[VIS_MODEL](ENCODER_DIMS, DECODER_DIMS)
projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)
evaluator = Evaluator(data_provider,projector, metric="euclidean")

In [10]:
print("standard")
# print(evaluator.eval_nn_test(1,15),evaluator.eval_nn_test(7,15), evaluator.eval_nn_test(15,15))
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3))

standard
Temporal nn preserving (test) for 1-th epoch 3: 2.296	 std:0.615
Temporal nn preserving (test) for 7-th epoch 3: 1.595	 std:0.764
Temporal nn preserving (test) for 15-th epoch 3: 2.397	 std:0.650
2.2964 1.595 2.3966


In [12]:
print("lightnorm")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

lightnorm
Temporal nn preserving (test) for 1-th epoch 3: 2.189	 std:0.794
Temporal nn preserving (test) for 7-th epoch 3: 1.171	 std:0.826
Temporal nn preserving (test) for 15-th epoch 3: 2.277	 std:0.720
2.1887 1.1706 2.2775


In [14]:
print("normal")
# print(evaluator.eval_nn_test(1,15),evaluator.eval_nn_test(7,15), evaluator.eval_nn_test(15,15))
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

normal
Temporal nn preserving (test) for 1-th epoch 3: 0.551	 std:0.766
Temporal nn preserving (test) for 7-th epoch 3: 0.658	 std:0.663
Temporal nn preserving (test) for 15-th epoch 3: 0.839	 std:0.679
0.5509 0.658 0.8386


In [16]:
print("skip")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3))

skip
Temporal nn preserving (test) for 1-th epoch 3: 0.638	 std:0.677
Temporal nn preserving (test) for 7-th epoch 3: 0.592	 std:0.659
Temporal nn preserving (test) for 15-th epoch 3: 0.852	 std:0.651
0.6384 0.5919 0.8518


In [18]:
print("full skip")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

full skip
Temporal nn preserving (test) for 1-th epoch 3: 1.640	 std:0.780
Temporal nn preserving (test) for 7-th epoch 3: 0.793	 std:0.709
Temporal nn preserving (test) for 15-th epoch 3: 1.288	 std:0.841
1.6396 0.793 1.2885


In [21]:
print("full estimation")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

full estimation
Temporal nn preserving (test) for 1-th epoch 3: 2.319	 std:0.757
Temporal nn preserving (test) for 7-th epoch 3: 1.286	 std:0.828
Temporal nn preserving (test) for 15-th epoch 3: 2.435	 std:0.669
2.3191 1.2861 2.4346


In [23]:
print("estimation")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

estimation
Temporal nn preserving (test) for 1-th epoch 3: 0.641	 std:0.723
Temporal nn preserving (test) for 7-th epoch 3: 0.721	 std:0.664
Temporal nn preserving (test) for 15-th epoch 3: 0.627	 std:0.635
0.6407 0.7212 0.6275


In [10]:
print("estimation")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

estimation
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_1/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_1/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_2/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_3/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_4/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_5/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_6/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_7/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_8/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_9/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_data/resnet18_mnist/Model/Epoch_10/tDVI_cnAE_estimation.pth
/home/xianglin/projects/DVI_dat

In [25]:
print("temporal-test")
# adjust temporal loss weight
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

temporal-test
Temporal nn preserving (test) for 1-th epoch 3: 1.749	 std:0.785
Temporal nn preserving (test) for 7-th epoch 3: 0.743	 std:0.709
Temporal nn preserving (test) for 15-th epoch 3: 1.214	 std:0.821
1.7489 0.7433 1.2137


In [10]:
print("temporal-test")
# add weights
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3));

temporal-test
Temporal nn preserving (test) for 1-th epoch 3: 2.081	 std:0.758
Temporal nn preserving (test) for 7-th epoch 3: 1.141	 std:0.813
Temporal nn preserving (test) for 15-th epoch 3: 2.330	 std:0.688
2.0808 1.1413 2.3298


In [11]:
print("different-margins")
# expand margin
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3))

different-margins
Temporal nn preserving (test) for 1-th epoch 3: 2.073	 std:0.791
Temporal nn preserving (test) for 7-th epoch 3: 0.873	 std:0.791
Temporal nn preserving (test) for 15-th epoch 3: 1.928	 std:0.705
2.0729 0.8727 1.9283


In [10]:
print("different-margins")
print(evaluator.eval_temporal_nn_test(1,3), evaluator.eval_temporal_nn_test(7,3), evaluator.eval_temporal_nn_test(15,3))

different-margins
Temporal nn preserving (test) for 1-th epoch 3: 2.157	 std:0.721
Temporal nn preserving (test) for 7-th epoch 3: 0.724	 std:0.731
Temporal nn preserving (test) for 15-th epoch 3: 2.077	 std:0.831
2.1569 0.7241 2.0771


## difference between cosine and euclidean

In [18]:
epoch = 15
n_neighbors = 3

In [20]:
epoch_num = (data_provider.e - data_provider.s) // data_provider.p + 1
l = data_provider.test_num
high_dists = np.zeros((l, epoch_num))
low_dists = np.zeros((l, epoch_num))

curr_data = data_provider.test_representation(epoch)
curr_embedding = projector.batch_project(epoch, curr_data)

for t in range(epoch_num):
    data = data_provider.test_representation(t * data_provider.p + data_provider.s)
    embedding = projector.batch_project(t * data_provider.p + data_provider.s, data)

    high_dist = evaluate_embedding_distance(data, curr_data, metric="cosine", one_target=False)
    low_dist = evaluate_embedding_distance(embedding, curr_embedding, metric="euclidean", one_target=False)
    high_dists[:, t] = high_dist
    low_dists[:, t] = low_dist

# find the index of top k dists
# argsort descent order
high_orders = np.argsort(high_dists, axis=1)
low_orders = np.argsort(low_dists, axis=1)

high_rankings = high_orders[:, 1:n_neighbors+1]
low_rankings = low_orders[:, 1:n_neighbors+1]

corr = np.zeros(len(high_dists))
for i in range(len(data)):
    corr[i] = len(np.intersect1d(high_rankings[i], low_rankings[i]))
print(corr.mean())

2.4612
