# Load module

In [1]:
import os, sys
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from scipy.spatial import distance
from scipy import stats

sys.path.append("..")
from singleVis.data import NormalDataProvider
from singleVis.projector import DVIProjector
from singleVis.vis_models import vis_models as vmodels

2023-09-19 10:35:27.636044: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
VIS_METHOD = "tDVI" # DeepVisualInsight
CONTENT_PATH = "/home/xianglin/projects/DVI_data/resnet18_mnist"
# CONTENT_PATH = "/home/xianglin/projects/DVI_data/BadNet_MNIST_noise"
GPU_ID = "0"

In [3]:
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [4]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
EPOCH_NAME = config["EPOCH_NAME"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
VIS_MODEL = VISUALIZATION_PARAMETER['VIS_MODEL']
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

# Define hyperparameters
# DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")
DEVICE = torch.device("cpu")

In [5]:
import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))

In [6]:
# Define data_provider
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name=EPOCH_NAME, verbose=1)
if PREPROCESS:
    data_provider._meta_data()
    if B_N_EPOCHS >0:
        data_provider._estimate_boundary(LEN//10, l_bound=L_BOUND)

Finish initialization...


# uncertainty estimation

In [20]:
def uncertainty_measure(prev_data, next_data):
    mean_t = prev_data.mean(axis=0)
    std_t = prev_data.std(axis=0)
    mean_tk = next_data.mean(axis=0)
    std_tk = next_data.std(axis=0)
    # Method 2: Sum of Z-scores (averaged over all features)
    z_score = np.abs(mean_tk - mean_t) / (np.sqrt(std_t ** 2 + std_tk ** 2)+5E-4)
    uncertainty1 = np.mean(z_score)
    # print(f"{uncertainty1*100:.2f}", "%")

    # # Method 3: Euclidean Distance in Mean-Std Space (averaged over all features)
    # uncertainty2 = np.mean(np.sqrt((mean_tk - mean_t)**2 + (std_tk - std_t)**2))
    # # print(f"{uncertainty2*100:.2f}", "%")

    # # Method 4: Mahalanobis Distance (simplified, averaged over all features)
    # uncertainty3 = np.mean(np.sqrt((mean_tk - mean_t)**2 / (std_t+5E-4) + (std_tk - std_t)**2 / (std_t+5E-4)))
    # # print(f"{uncertainty3*100:.2f}", "%")

    # # Assuming variables are uncorrelated, create diagonal covariance matrices
    # covariance_matrix_t = np.diag(std_t**2)

    # # Calculate the Mahalanobis distance
    # uncertainty4 = distance.mahalanobis(mean_t, mean_tk, np.linalg.inv(covariance_matrix_t))
    # return uncertainty1, uncertainty2, uncertainty3, uncertainty4
    return uncertainty1


In [21]:
u1 = np.zeros(20)
u2 = np.zeros(20)
u3 = np.zeros(20)
u4 = np.zeros(20)
for i in range(1,21,1):
    # for j in range(, i+2, 1):
    j=1
    prev_data = data_provider.train_representation(i)
    next_data = data_provider.train_representation(j)
    u1[i-1] = uncertainty_measure(prev_data, next_data)

In [10]:
from scipy import stats
stats.pearsonr(u1,u4),stats.pearsonr(u2,u4),stats.pearsonr(u3,u4)

(PearsonRResult(statistic=0.9991827160274641, pvalue=1.5024838909406798e-68),
 PearsonRResult(statistic=0.9803757553415657, pvalue=1.6454616619760446e-35),
 PearsonRResult(statistic=0.995817163320422, pvalue=1.5100968983748353e-51))

## check continuity

In [7]:
VIS_MODEL_NAME = "tDVI_cnAE_sequence"
VIS_MODEL = "cnAE"
model = vmodels[VIS_MODEL](ENCODER_DIMS, DECODER_DIMS)
projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)

In [22]:
prev_e = 1
next_e = 200

prev_data = data_provider.train_representation(prev_e)
next_data = data_provider.train_representation(next_e)

In [23]:
prev_low = projector.batch_project(prev_e, prev_data)
next_low = projector.batch_project(next_e, next_data)
estimated_next_low = projector.batch_project(prev_e, next_data)

In [24]:
np.ptp(prev_low, axis=0), np.ptp(next_low, axis=0)

(array([ 7.289774, 14.076332], dtype=float32),
 array([35.418564, 39.14397 ], dtype=float32))

In [11]:
high_change = np.array([distance.cosine(prev_data[i], next_data[i]) for i in range(len(prev_data))])
low_change = np.linalg.norm(next_low-prev_low, axis=1)
estimated_low_change = np.linalg.norm(estimated_next_low-prev_low, axis=1)

In [26]:
(next_low).ptp(0), low_change.mean(), low_change.max(), low_change.min()

(array([35.418564, 39.14397 ], dtype=float32),
 0.29030046,
 7.110312,
 0.0008733035)

In [27]:
(estimated_next_low).ptp(0), estimated_low_change.mean(), estimated_low_change.max(), estimated_low_change.min()

(array([34.960976, 38.18923 ], dtype=float32),
 0.14219756,
 2.8431973,
 0.00032067572)

In [23]:
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(prev_low)
distances, _ = nbrs.kneighbors(prev_low)
distances[:, 1].mean()

0.019268101446479047

In [24]:
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(estimated_next_low)
distances, _ = nbrs.kneighbors(estimated_next_low)
distances[:, 1].mean()

0.015055993343098498

In [None]:
print("True", stats.spearmanr(high_change, low_change))
print("Estimated High", stats.spearmanr(high_change, estimated_low_change))

## check skip epoch similarity

In [34]:
for i in range(5,51,1):
    j = i-4
    prev_data = data_provider.train_representation(j)
    next_data = data_provider.train_representation(i)
    prev_low = projector.batch_project(j, prev_data)
    high_change = np.array([distance.cosine(prev_data[k], next_data[k]) for k in range(len(prev_data))])
    print(j,i, high_change.mean())
    print(prev_low.ptp(0))

/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_1/tDVI_cnAE_estimation.pth
1 5 0.2230496001760165
[25.074665 18.206139]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_2/tDVI_cnAE_estimation.pth
2 6 0.17494861816664536
[24.953505 18.175526]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_3/tDVI_cnAE_estimation.pth
3 7 0.1724971355120341
[24.931423 17.5709  ]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_4/tDVI_cnAE_estimation.pth
4 8 0.17659039916197458
[24.510033 17.410112]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_5/tDVI_cnAE_estimation.pth
5 9 0.17508498757067922
[24.400787 17.268433]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_6/tDVI_cnAE_estimation.pth
6 10 0.1817902295342199
[24.333866 17.286922]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_7/tDVI_cnAE_estimation.pth
7 11 0.17609081199030124
[24.303978 17.085386]
/home/xianglin/projects/DVI_data/resnet18_fmnist/Model/Epoch_8/

# eval temporal

In [7]:
from singleVis.eval.evaluator import Evaluator
from singleVis.eval.evaluate import *

In [17]:
from singleVis.projector import TimeVisProjector
from singleVis.SingleVisualizationModel import VisModel
ENCODER_DIMS = [512,256,256,256,256,256,2]
DECODER_DIMS = [2,256,256,256,256,256,512]
# ENCODER_DIMS = [512,256,2]
# DECODER_DIMS = [2,256,512]
VIS_MODEL_NAME = "timevis"
model = VisModel(ENCODER_DIMS, DECODER_DIMS)
projector = TimeVisProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, device=DEVICE)
evaluator = Evaluator(data_provider, projector, metric="euclidean")

In [16]:
from singleVis.projector import tfDVIProjector
from singleVis.eval.evaluator import Evaluator
flag = "_temporal_id_withoutB"
projector = tfDVIProjector(CONTENT_PATH, flag=flag)
evaluator = Evaluator(data_provider, projector, metric="euclidean")

In [14]:
VIS_MODEL_NAME = "tDVI_cnAE_full_skip"
VIS_MODEL = "cnAE"
model = vmodels[VIS_MODEL](ENCODER_DIMS, DECODER_DIMS)
projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)
evaluator = Evaluator(data_provider,projector, metric="euclidean")

In [16]:
evaluator.eval_temporal_nn_test(15,3)

Temporal nn preserving (test) for 15-th epoch 3: 1.288	 std:0.841


1.2885