In [20]:
import os, sys
import numpy as np
import torch
import json

sys.path.append("..")
from singleVis.SingleVisualizationModel import VisModel
from singleVis.data import NormalDataProvider
from singleVis.eval.evaluator import Evaluator
from singleVis.projector import Projector, tfDVIProjector, TimeVisProjector, DVIProjector

In [21]:
# tensorflow
visible_device = "0,1,2,3"
os.environ["CUDA_VISIBLE_DEVICES"] = visible_device

In [22]:
dataset = "mnist"
noise_type = "symmetric"
noise_rate = "5"

In [23]:
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/clean_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    clean_label = json.load(f)
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/noisy_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    noisy_label = json.load(f)

clean_label = np.array(clean_label)
noisy_label = np.array(noisy_label)

In [24]:
VIS_METHOD = "tDVI"
# VIS_METHOD = "TimeVis"

In [25]:
CONTENT_PATH = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/".format(noise_type, dataset, noise_rate)
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [26]:
CLASSES = config["CLASSES"]
GPU_ID = config["GPU"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]
EPOCH_NAME = config["EPOCH_NAME"]


# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
VIS_MODEL_NAME = VISUALIZATION_PARAMETER["VIS_MODEL_NAME"]

TOTOAL_EPOCH = (EPOCH_END-EPOCH_START)//EPOCH_PERIOD + 1

# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

import Model.model as subject_model
# net = resnet18()
net = eval("subject_model.{}()".format(NET))

data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name="Epoch", verbose=1)

if VIS_METHOD == "tfDVI":
    # Define Projector
    flag = "_temporal_id_withoutB"
    projector = tfDVIProjector(CONTENT_PATH, flag=flag)
elif VIS_METHOD == "TimeVis":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    projector = TimeVisProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, device=DEVICE)
elif VIS_METHOD == "DeepDebugger":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    SEGMENTS = VISUALIZATION_PARAMETER["SEGMENTS"]
    projector = Projector(vis_model=model, content_path=CONTENT_PATH, segments=SEGMENTS, device=DEVICE)
elif VIS_METHOD == "tDVI":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    projector = DVIProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, epoch_name=EPOCH_NAME, device=DEVICE)


Finish initialization...


In [18]:
samples = np.zeros((TOTOAL_EPOCH, LEN, 512))
for i in range(EPOCH_START, EPOCH_END+EPOCH_PERIOD, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    samples[e] = data_provider.train_representation(i)

In [19]:
embeddings_2d = np.zeros((TOTOAL_EPOCH, LEN, 2))
for i in range(EPOCH_START, EPOCH_END+EPOCH_PERIOD, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    embeddings_2d[e] = projector.batch_project(i, samples[e])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])

/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_1/tDVI.pth
Successfully load the DVI visualization model for iteration 1
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_2/tDVI.pth
Successfully load the DVI visualization model for iteration 2
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_3/tDVI.pth
Successfully load the DVI visualization model for iteration 3
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_4/tDVI.pth
Successfully load the DVI visualization model for iteration 4
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_5/tDVI.pth
Successfully load the DVI visualization model for iteration 5
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_6/tDVI.pth
Successfully load the DVI visualization model for iteration 6
/home/xianglin/projects/DVI_data/noisy/symmetric/mnist/5/Model/Epoch_7/tDVI.pth
Successfully load the DVI visualization model for iteration 7
/home/

In [20]:
path = os.path.join(CONTENT_PATH, "Model","{}_trajectory_embeddings.npy".format(VIS_METHOD))
np.save(path,embeddings_2d)

In [21]:
path = os.path.join(CONTENT_PATH, "Model","{}_trajectory_embeddings.npy".format(VIS_METHOD))
embeddings_2d = np.load(path)

In [22]:
from scipy.special import softmax
uncertainties = np.zeros((TOTOAL_EPOCH, LEN))
for epoch in range(EPOCH_START, EPOCH_END+EPOCH_PERIOD, EPOCH_PERIOD):
    samples = data_provider.train_representation(epoch)
    pred = data_provider.get_pred(epoch, samples)
    confidence = np.amax(softmax(pred, axis=1), axis=1)
    uncertainty = 1-confidence
    uncertainties[(epoch-EPOCH_START)//EPOCH_PERIOD] = uncertainty
# TODO trajectory of uncertainty

100%|██████████| 300/300 [00:00<00:00, 7730.72it/s]
100%|██████████| 300/300 [00:00<00:00, 8519.24it/s]
100%|██████████| 300/300 [00:00<00:00, 7747.57it/s]
100%|██████████| 300/300 [00:00<00:00, 8493.65it/s]
100%|██████████| 300/300 [00:00<00:00, 7726.93it/s]
100%|██████████| 300/300 [00:00<00:00, 8466.39it/s]
100%|██████████| 300/300 [00:00<00:00, 7704.22it/s]
100%|██████████| 300/300 [00:00<00:00, 8468.84it/s]
100%|██████████| 300/300 [00:00<00:00, 7725.64it/s]
100%|██████████| 300/300 [00:00<00:00, 8489.06it/s]
100%|██████████| 300/300 [00:00<00:00, 7716.50it/s]
100%|██████████| 300/300 [00:00<00:00, 8503.35it/s]
100%|██████████| 300/300 [00:00<00:00, 7706.91it/s]
100%|██████████| 300/300 [00:00<00:00, 8488.37it/s]
100%|██████████| 300/300 [00:00<00:00, 7720.00it/s]


In [8]:
noise_idxs = np.argwhere(clean_label!=noisy_label).squeeze()
noise_idxs.shape

(2986,)

In [24]:
from singleVis.trajectory_manager import Recommender
import pickle
tm = Recommender(uncertainty, embeddings_2d, 30, period=15)
tm.clustered()
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'wb') as f:
    pickle.dump(tm, f, pickle.HIGHEST_PROTOCOL)

In [27]:
import pickle
VIS_METHOD = "tfDVI"
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'rb') as f:
    tm = pickle.load(f)

In [28]:
BUDGET = 10
TOLERANCE = 0.1
ROUND = 10
INIT_ROUND = 10000

In [29]:
def add_noise(rate, acc_idxs, rej_idxs):
    if rate == 0:
        return acc_idxs, rej_idxs
    acc_noise = np.random.choice(len(acc_idxs), size=int(len(acc_idxs)*rate))
    acc_noise = acc_idxs[acc_noise]
    new_acc = np.setdiff1d(acc_idxs, acc_noise)

    rej_noise = np.random.choice(len(rej_idxs), size=int(len(rej_idxs)*rate))
    rej_noise = rej_idxs[rej_noise]
    new_rej = np.setdiff1d(rej_idxs, rej_noise)

    new_acc = np.concatenate((new_acc, rej_noise), axis=0)
    new_rej = np.concatenate((new_rej, acc_noise), axis=0)
    return new_acc, new_rej

def init_sampling(tm, method, round, budget):
    print("Feedback sampling initialization ({}):".format(method))
    init_rate = list()
    for _ in range(round):
        correct = np.array([]).astype(np.int32)
        wrong = np.array([]).astype(np.int32)
        selected,_ = tm.sample_batch_init(correct, wrong, budget)
        c = np.intersect1d(selected, noise_idxs)
        init_rate.append(len(c)/budget)
    print("Success Rate:\t{:.4f}".format(sum(init_rate)/len(init_rate)))
    return sum(init_rate)/len(init_rate)

def feedback_sampling(tm, method, round, budget, noise_rate=0.0):
    # print("Feedback sampling ({}) with noise {}:".format(method, noise_rate))
    rate = np.zeros(round)
    correct = np.array([]).astype(np.int32)
    wrong = np.array([]).astype(np.int32)
    selected,_ = tm.sample_batch_init(correct, wrong, budget)
    c = np.intersect1d(selected, noise_idxs)
    w = np.setdiff1d(selected, c)
    correct = np.concatenate((correct, c), axis=0)
    wrong = np.concatenate((wrong, w), axis=0)
    rate[0] = len(correct)/float(budget)

    # inject noise
    correct, wrong = add_noise(noise_rate, correct, wrong)

    for r in range(1, round, 1):
        selected,_,coef_ = tm.sample_batch(correct, wrong, budget, True)
        c = np.intersect1d(selected, noise_idxs)
        w = np.setdiff1d(selected, c)
        rate[r] = len(c)/budget
        # inject noise
        c, w = add_noise(noise_rate, c, w)

        correct = np.concatenate((correct, c), axis=0)
        wrong = np.concatenate((wrong, w), axis=0)
    # print("Success Rate:\t{:.4f}".format(rate.mean()))
    ac_rate = np.array([rate[:i].mean() for i in range(1, len(rate)+1)])
    # print(f'Coef:\t{coef_}')
    # print(f'"Success Rate:\t{ac_rate}')
    return ac_rate

In [28]:
tm.score_new_sample(embeddings_2d[1][-tm.period:])

(0.9502, 0.9070666666666667, 0.9028166666666667)

In [47]:
import pickle
VIS_METHOD = "TimeVis"
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'rb') as f:
    tm = pickle.load(f)

In [33]:
# random init
print("Random sampling init")
s_rate = list()
pool = np.arange(LEN)
for _ in range(INIT_ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    s_rate.append(len(np.intersect1d(s_idxs, noise_idxs))/BUDGET)
print("Success Rate:\t{:.4f}".format(sum(s_rate)/len(s_rate)))


Random sampling init
Success Rate:	0.0492


In [32]:
# tdvi init
init_sampling(tm, method=VIS_METHOD, round=INIT_ROUND, budget=BUDGET)

Feedback sampling initialization (tDVI):
Success Rate:	0.0511


0.05112000000000237

In [30]:
# dvi init
init_sampling(tm, method=VIS_METHOD, round=INIT_ROUND, budget=BUDGET)

Feedback sampling initialization (tfDVI):
Success Rate:	0.0506


0.050630000000002374

In [48]:
# TimeVis init
init_sampling(tm, method=VIS_METHOD, round=INIT_ROUND, budget=BUDGET)

Feedback sampling initialization (TimeVis):
Success Rate:	0.0504


0.05036000000000239

In [39]:
# random Feedback
print("Random sampling feedback")
random_rate = np.zeros(ROUND)
pool = np.arange(LEN)
for r in range(ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    random_rate[r] = len(np.intersect1d(s_idxs, noise_idxs))/BUDGET
    pool = np.setdiff1d(pool, s_idxs)
print("Success Rate:\t{:.4f}".format(sum(random_rate)/len(random_rate)))
ac_random_rate = np.array([random_rate[:i].mean() for i in range(1, len(random_rate)+1)])
print(ac_random_rate)

Random sampling feedback
Success Rate:	0.0600
[0.1        0.05       0.03333333 0.025      0.04       0.05
 0.04285714 0.05       0.05555556 0.06      ]


In [42]:
# tdvi Feedback
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.47456000000000004 0.06528404399238759


In [45]:
# dvi Feedback
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.43413999999999997 0.12223526659683775


In [49]:
# TimeVis Feedback
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.47746000000000005 0.07743480096184145


In [43]:
# tdvi Feedback
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, noise_rate=0.05)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.47046000000000004 0.0643676036527693


In [46]:
# dvi Feedback
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, noise_rate=0.05)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.44256 0.10269297152191087


In [51]:
# TimeVis Feedback with noise
ac = np.zeros(500)
for i in range(500):
    ac_rate = feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, noise_rate=0.05)
    ac[i] = ac_rate[-1]
print(ac.mean(), ac.std())

0.48102 0.059625159119284545
