In [1]:
import os, sys
import numpy as np
import torch
import json

sys.path.append("..")
from singleVis.SingleVisualizationModel import SingleVisualizationModel
from singleVis.data import NormalDataProvider
from singleVis.eval.evaluator import Evaluator
from singleVis.projector import Projector

In [2]:
dataset = "fmnist"
noise_type = "symmetric"
noise_rate = "5"

In [3]:
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/clean_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    clean_label = json.load(f)
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/noisy_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    noisy_label = json.load(f)

clean_label = np.array(clean_label)
noisy_label = np.array(noisy_label)

In [4]:
CONTENT_PATH = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/".format(noise_type, dataset, noise_rate)
sys.path.append(CONTENT_PATH)
from config import config

In [5]:
GPU_ID = 1
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
TOTOAL_EPOCH = (EPOCH_END-EPOCH_START)//EPOCH_PERIOD+1

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
S_LAMBDA = VISUALIZATION_PARAMETER["S_LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
INIT_NUM = VISUALIZATION_PARAMETER["INIT_NUM"]
ALPHA = VISUALIZATION_PARAMETER["ALPHA"]
BETA = VISUALIZATION_PARAMETER["BETA"]
MAX_HAUSDORFF = VISUALIZATION_PARAMETER["MAX_HAUSDORFF"]
HIDDEN_LAYER = VISUALIZATION_PARAMETER["HIDDEN_LAYER"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
T_N_EPOCHS = VISUALIZATION_PARAMETER["T_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]
SEGMENTS = VISUALIZATION_PARAMETER["SEGMENTS"]
RESUME_SEG = VISUALIZATION_PARAMETER["RESUME_SEG"]
# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

content_path = CONTENT_PATH
sys.path.append(content_path)

import Model.model as subject_model
# net = resnet18()
net = eval("subject_model.{}()".format(NET))
classes = ("airplane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, split=-1, device=DEVICE, classes=CLASSES,verbose=1)
if PREPROCESS:
    data_provider.initialize(LEN//10, l_bound=L_BOUND)

model = SingleVisualizationModel(input_dims=512, output_dims=2, units=256, hidden_layer=HIDDEN_LAYER)
projector = Projector(vis_model=model, content_path=CONTENT_PATH, segments=SEGMENTS, device=DEVICE)

Finish initialization...


In [6]:
samples = np.zeros((TOTOAL_EPOCH, LEN, 512))
for i in range(1, TOTOAL_EPOCH+1, 1):
    samples[i-1] = data_provider.train_representation(i)

In [7]:
embeddings_2d = np.zeros((TOTOAL_EPOCH, LEN, 2))
for e in range(1, TOTOAL_EPOCH+1, 1):
    embeddings_2d[e-1] = projector.batch_project(e, samples[e-1])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])

Successfully load the visualization model for range (1,4)...
Same range as current visualization model...
Same range as current visualization model...
Successfully load the visualization model for range (4,8)...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Successfully load the visualization model for range (8,50)...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same r

In [8]:
path = os.path.join(CONTENT_PATH, "Model","trajectory_embeddings.npy")
np.save(path,embeddings_2d)

In [9]:
samples = data_provider.train_representation(TOTOAL_EPOCH)
path = os.path.join(CONTENT_PATH, "Model","trajectory_embeddings.npy")
embeddings_2d = np.load(path)

In [10]:
samples.shape, embeddings_2d.shape

((60000, 512), (60000, 50, 2))

In [11]:
from scipy.special import softmax
samples = data_provider.train_representation(TOTOAL_EPOCH)
pred = data_provider.get_pred(TOTOAL_EPOCH, samples)
confidence = np.amax(softmax(pred, axis=1), axis=1)
uncertainty = 1-confidence

100%|██████████| 300/300 [00:00<00:00, 8437.15it/s]


In [12]:
noise_idxs = np.argwhere(clean_label!=noisy_label).squeeze()
noise_idxs.shape

(3003,)

In [13]:
from singleVis.trajectory_manager import TrajectoryManager, FeedbackTrajectoryManager, Recommender
# tm = TrajectoryManager(samples, embeddings_2d,30, period=100,metric="a")
# tm = FeedbackTrajectoryManager(samples, embeddings_2d,30, period=100,metric="a")
tm = Recommender(uncertainty, embeddings_2d, 30, period=50, metric="a")#20 period

In [14]:
tm.clustered()

In [15]:
test_len = 50

In [16]:
correct = np.array([]).astype(np.int32)
wrong = np.array([]).astype(np.int32)
selected,_ = tm.sample_batch_init(correct, wrong, test_len)
c = np.intersect1d(selected, noise_idxs)
w = np.setdiff1d(selected, c)
print(len(c), test_len)
correct = np.concatenate((correct, c), axis=0)
wrong = np.concatenate((wrong, w), axis=0)

3 50


In [17]:
# kernel regression
for _ in range(10):
    selected,_ = tm.sample_batch(correct, wrong, test_len)
    c = np.intersect1d(selected, noise_idxs)
    w = np.setdiff1d(selected, c)
    print(len(c), test_len)
    correct = np.concatenate((correct, c), axis=0)
    wrong = np.concatenate((wrong, w), axis=0)

50 50
50 50
49 50
50 50
50 50
49 50
48 50
48 50
50 50
48 50


In [18]:
# random
pool = np.arange(LEN)
for _ in range(10):
    selected = np.random.choice(pool, size=test_len, replace=False)
    c = np.intersect1d(selected, noise_idxs)
    w = np.setdiff1d(selected, c)
    print(len(c), test_len)
    pool = np.setdiff1d(pool, selected)


2 50
2 50
5 50
2 50
1 50
4 50
4 50
4 50
2 50
3 50
