Try to skip missing epoch and use prediction to estimate reference

# Load module

In [1]:
import os, sys
import json
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from scipy.spatial import distance

sys.path.append("..")
from singleVis.data import NormalDataProvider
from singleVis.projector import DVIProjector
from singleVis.vis_models import vis_models as vmodels

2023-08-30 21:21:26.968266: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
VIS_METHOD = "tDVI" # DeepVisualInsight
CONTENT_PATH = "/home/xianglin/projects/DVI_data/resnet18_cifar10"
GPU_ID = "0"

In [3]:
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [4]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
EPOCH_NAME = config["EPOCH_NAME"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
VIS_MODEL = VISUALIZATION_PARAMETER['VIS_MODEL']
LAMBDA1 = VISUALIZATION_PARAMETER["LAMBDA1"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

# Define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

In [5]:
import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))

In [125]:
VIS_MODEL_NAME = "tDVI_cnAE"
EVALUATION_NAME = "evaluation_tDVI_cnAE"

In [126]:
# Define data_provider
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name=EPOCH_NAME, verbose=1)
if PREPROCESS:
    data_provider._meta_data()
    if B_N_EPOCHS >0:
        data_provider._estimate_boundary(LEN//10, l_bound=L_BOUND)

Finish initialization...


# estimate change

In [174]:
VIS_MODEL_NAME = "tDVI_baseAE"
VIS_MODEL = "baseAE"
VIS_MODEL_NAME = "tDVI_cnAE"
VIS_MODEL = "cnAE"

In [175]:
model = vmodels[VIS_MODEL](ENCODER_DIMS, DECODER_DIMS)
projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)

In [224]:
prev_e = 1
next_e = 5

In [225]:
prev_data = data_provider.train_representation(prev_e)
next_data = data_provider.train_representation(next_e)

In [226]:
# process
m = prev_data.mean(axis=0)
var = prev_data.std(axis=0)
next_data_norm = (next_data - next_data.mean(axis=0))/next_data.std(axis=0)*var+m

In [234]:
(next_data-next_data_norm).mean()

-0.12560144

In [227]:
prev_low = projector.batch_project(prev_e, prev_data)
next_low = projector.batch_project(next_e, next_data)
estimated_next_low = projector.batch_project(prev_e, next_data)
estimated_next_low_norm = projector.batch_project(prev_e, next_data_norm)

/home/xianglin/projects/DVI_data/resnet18_cifar10/Model/Epoch_1/tDVI_cnAE.pth
Successfully load the DVI visualization model for iteration 1
/home/xianglin/projects/DVI_data/resnet18_cifar10/Model/Epoch_5/tDVI_cnAE.pth
Successfully load the DVI visualization model for iteration 5
/home/xianglin/projects/DVI_data/resnet18_cifar10/Model/Epoch_1/tDVI_cnAE.pth
Successfully load the DVI visualization model for iteration 1
/home/xianglin/projects/DVI_data/resnet18_cifar10/Model/Epoch_1/tDVI_cnAE.pth
Successfully load the DVI visualization model for iteration 1


In [229]:
from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=15, algorithm='ball_tree').fit(next_low)
distances, _ = nbrs.kneighbors(next_low)
distances[:, -1].mean()

0.0848063634106427

In [230]:
estimated_low_error = np.linalg.norm(next_low-estimated_next_low, axis=1)
estimated_low_norm_error = np.linalg.norm(next_low-estimated_next_low_norm, axis=1)
estimated_low_error.mean(), estimated_low_norm_error.mean()

(2.1195915, 1.6328115)

In [243]:
estimated_low_error

array([0.4551504, 1.0872269, 0.9516876, ..., 1.443281 , 0.69669  ,
       0.3775957], dtype=float32)

In [242]:
next_low.max(axis=0)-next_low.min(axis=0)

array([ 7.4031343, 16.665407 ], dtype=float32)

In [231]:
low_change = np.linalg.norm(prev_low-next_low, axis=1)
estimated_low_change = np.linalg.norm(prev_low-estimated_next_low, axis=1)
estimated_low_norm_change = np.linalg.norm(prev_low-estimated_next_low_norm, axis=1)
low_change.mean(), estimated_low_change.mean(), estimated_low_norm_change.mean()

(3.1808534, 2.4982865, 2.3169856)

In [232]:
high_change = np.array([distance.cosine(prev_data[i], next_data[i]) for i in range(len(prev_data))])

In [233]:
from scipy import stats
print("True", stats.spearmanr(high_change, low_change))
print("Estimated Low", stats.spearmanr(low_change, estimated_low_change))
print("Estimated High", stats.spearmanr(high_change, estimated_low_change))

print("Estimated Norm Low", stats.spearmanr(low_change, estimated_low_norm_change))
print("Estimated Norm High", stats.spearmanr(high_change, estimated_low_norm_change))

True SignificanceResult(statistic=0.4194222502218448, pvalue=0.0)
Estimated Low SignificanceResult(statistic=0.5185372764163582, pvalue=0.0)
Estimated High SignificanceResult(statistic=0.30948839957859076, pvalue=0.0)
Estimated Norm Low SignificanceResult(statistic=0.7043444633656466, pvalue=0.0)
Estimated Norm High SignificanceResult(statistic=0.3864832898003551, pvalue=0.0)


In [236]:
from singleVis.eval.evaluate import *
from singleVis.eval.evaluator import Evaluator
evaluator = Evaluator(data_provider, projector, metric="euclidean")

In [237]:
val = evaluate_proj_nn_perseverance_knn(next_data, estimated_next_low, n_neighbors=15, metric="euclidean")     
val

Wed Aug 30 22:11:51 2023 Building RP forest with 16 trees
Wed Aug 30 22:11:54 2023 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	 4  /  16
	 5  /  16
	 6  /  16
	Stopping threshold met -- exiting after 6 iterations
Wed Aug 30 22:13:19 2023 Building RP forest with 16 trees
Wed Aug 30 22:13:19 2023 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	Stopping threshold met -- exiting after 2 iterations


In [238]:
evaluator.eval_nn_train(next_e, 15)

/home/xianglin/projects/DVI_data/resnet18_cifar10/Model/Epoch_5/tDVI_cnAE.pth
Successfully load the DVI visualization model for iteration 5
Wed Aug 30 22:13:22 2023 Building RP forest with 16 trees
Wed Aug 30 22:13:22 2023 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	 3  /  16
	 4  /  16
	 5  /  16
	 6  /  16
	Stopping threshold met -- exiting after 6 iterations
Wed Aug 30 22:13:24 2023 Building RP forest with 16 trees
Wed Aug 30 22:13:24 2023 NN descent for 16 iterations
	 1  /  16
	 2  /  16
	Stopping threshold met -- exiting after 2 iterations
#train# nn preserving: 1.31/15 in epoch 5


1.30736