In [None]:
import os, sys
import numpy as np
import torch
import json

sys.path.append("..")
from singleVis.SingleVisualizationModel import VisModel
from singleVis.data import NormalDataProvider
from singleVis.eval.evaluator import Evaluator
from singleVis.projector import Projector, tfDVIProjector, TimeVisProjector

In [None]:
dataset = "cifar10"
noise_type = "symmetric"
noise_rate = "20"

In [None]:
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/clean_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    clean_label = json.load(f)
path = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/noisy_label.json".format(noise_type, dataset, noise_rate)
with open(path, "r") as f:
    noisy_label = json.load(f)

clean_label = np.array(clean_label)
noisy_label = np.array(noisy_label)

In [None]:
VIS_METHOD = "tfDVI"

In [None]:
CONTENT_PATH = "/home/xianglin/projects/DVI_data/noisy/{}/{}/{}/".format(noise_type, dataset, noise_rate)
sys.path.append(CONTENT_PATH)
with open(os.path.join(CONTENT_PATH, "config.json"), "r") as f:
    config = json.load(f)
config = config[VIS_METHOD]

In [None]:
CLASSES = config["CLASSES"]
GPU_ID = config["GPU"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]


# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]
# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
ENCODER_DIMS = VISUALIZATION_PARAMETER["ENCODER_DIMS"]
DECODER_DIMS = VISUALIZATION_PARAMETER["DECODER_DIMS"]
VIS_MODEL_NAME = VISUALIZATION_PARAMETER["VIS_MODEL_NAME"]

TOTOAL_EPOCH = (EPOCH_END-EPOCH_START)//EPOCH_PERIOD + 1

# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

import Model.model as subject_model
# net = resnet18()
net = eval("subject_model.{}()".format(NET))

data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, device=DEVICE, classes=CLASSES, epoch_name="Epoch", verbose=1)

if VIS_METHOD == "tfDVI":
    # Define Projector
    flag = "_temporal_id_withoutB"
    projector = tfDVIProjector(CONTENT_PATH, flag=flag)
elif VIS_METHOD == "TimeVis":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    projector = TimeVisProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name=VIS_MODEL_NAME, device=DEVICE)
elif VIS_METHOD == "DeepDebugger":
    model = VisModel(ENCODER_DIMS, DECODER_DIMS)
    SEGMENTS = VISUALIZATION_PARAMETER["SEGMENTS"]
    projector = Projector(vis_model=model, content_path=CONTENT_PATH, segments=SEGMENTS, device=DEVICE)

In [None]:
samples = np.zeros((TOTOAL_EPOCH, LEN, 512))
for i in range(EPOCH_START, EPOCH_END, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    samples[e] = data_provider.train_representation(i)


In [None]:
# embeddings_2d = np.zeros((TOTOAL_EPOCH, LEN, 2))
# for e in range(1, TOTOAL_EPOCH+1, 1):
#     embeddings_2d[e-1] = projector.batch_project(e, samples[e-1])
# embeddings_2d = np.transpose(embeddings_2d, [1,0,2])

embeddings_2d = np.zeros((TOTOAL_EPOCH, LEN, 2))
for i in range(EPOCH_START, EPOCH_END, EPOCH_PERIOD):
    e = (i-EPOCH_START)//EPOCH_PERIOD
    embeddings_2d[e] = projector.batch_project(i, samples[e])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])


In [None]:
path = os.path.join(CONTENT_PATH, "Model","{}_trajectory_embeddings.npy".format(VIS_METHOD))
np.save(path,embeddings_2d)

In [None]:
path = os.path.join(CONTENT_PATH, "Model","{}_trajectory_embeddings.npy".format(VIS_METHOD))
embeddings_2d = np.load(path)

In [None]:
samples.shape, embeddings_2d.shape

In [None]:
from scipy.special import softmax
samples = data_provider.train_representation(TOTOAL_EPOCH)
pred = data_provider.get_pred(TOTOAL_EPOCH, samples)
confidence = np.amax(softmax(pred, axis=1), axis=1)
uncertainty = 1-confidence
uncertainty.shape

In [None]:
noise_idxs = np.argwhere(clean_label!=noisy_label).squeeze()
noise_idxs.shape

In [None]:
from singleVis.trajectory_manager import Recommender
import pickle
tm = Recommender(uncertainty, embeddings_2d, 30, period=200, metric="a")
tm.clustered()
with open(os.path.join(CONTENT_PATH,  '{}_sample_recommender.pkl'.format(VIS_METHOD)), 'wb') as f:
    pickle.dump(tm, f, pickle.HIGHEST_PROTOCOL)

In [None]:
import pickle
with open(os.path.join(CONTENT_PATH,  'sample_recommender.pkl'), 'rb') as f:
    tm = pickle.load(f)

In [None]:
BUDGET = 50
TOLERANCE = 0.1
ROUND = 10
INIT_ROUND = 10000

In [None]:
def add_noise(rate, acc_idxs, rej_idxs):
    if rate == 0:
        return acc_idxs, rej_idxs
    acc_noise = np.random.choice(len(acc_idxs), size=int(len(acc_idxs)*rate))
    acc_noise = acc_idxs[acc_noise]
    new_acc = np.setdiff1d(acc_idxs, acc_noise)

    rej_noise = np.random.choice(len(rej_idxs), size=int(len(rej_idxs)*rate))
    rej_noise = rej_idxs[rej_noise]
    new_rej = np.setdiff1d(rej_idxs, rej_noise)

    new_acc = np.concatenate((new_acc, rej_noise), axis=0)
    new_rej = np.concatenate((new_rej, acc_noise), axis=0)
    return new_acc, new_rej

def init_sampling(tm, method, round, budget):
    print("Feedback sampling initialization ({}):".format(method))
    init_rate = list()
    for _ in range(round):
        correct = np.array([]).astype(np.int32)
        wrong = np.array([]).astype(np.int32)
        selected,_ = tm.sample_batch_init(correct, wrong, budget)
        c = np.intersect1d(selected, noise_idxs)
        init_rate.append(len(c)/budget)
    print("Success Rate:\t{:.4f}".format(sum(init_rate)/len(init_rate)))
    return sum(init_rate)/len(init_rate)

def feedback_sampling(tm, method, round, budget, noise_rate=0.0):
    print("Feedback sampling ({}) with noise {}:".format(method, noise_rate))
    rate = np.zeros(round)
    correct = np.array([]).astype(np.int32)
    wrong = np.array([]).astype(np.int32)
    selected,_ = tm.sample_batch_init(correct, wrong, budget)
    c = np.intersect1d(selected, noise_idxs)
    w = np.setdiff1d(selected, c)
    correct = np.concatenate((correct, c), axis=0)
    wrong = np.concatenate((wrong, w), axis=0)
    rate[0] = len(correct)/float(budget)

    # inject noise
    correct, wrong = add_noise(noise_rate, correct, wrong)

    for r in range(1, round, 1):
        selected,_ = tm.sample_batch(correct, wrong, budget)
        c = np.intersect1d(selected, noise_idxs)
        w = np.setdiff1d(selected, c)
        rate[r] = len(c)/budget
        # inject noise
        c, w = add_noise(noise_rate, c, w)

        correct = np.concatenate((correct, c), axis=0)
        wrong = np.concatenate((wrong, w), axis=0)
    print("Success Rate:\t{:.4f}".format(rate.mean()))
    ac_rate = np.array([rate[:i].mean() for i in range(1, len(rate)+1)])
    return ac_rate

In [None]:
# random init
print("Random sampling init")
s_rate = list()
pool = np.arange(LEN)
for _ in range(INIT_ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    s_rate.append(len(np.intersect1d(s_idxs, noise_idxs))/BUDGET)
print("Success Rate:\t{:.4f}".format(sum(s_rate)/len(s_rate)))


In [None]:
# dvi init
init_sampling(tm, method=VIS_METHOD, round=INIT_ROUND, budget=BUDGET)

In [None]:
# random Feedback
print("Random sampling feedback")
random_rate = np.zeros(ROUND)
pool = np.arange(LEN)
for r in range(ROUND):
    s_idxs = np.random.choice(pool,size=BUDGET,replace=False)
    random_rate[r] = len(np.intersect1d(s_idxs, noise_idxs))/BUDGET
    pool = np.setdiff1d(pool, s_idxs)
print("Success Rate:\t{:.4f}".format(sum(random_rate)/len(random_rate)))
ac_random_rate = np.array([random_rate[:i].mean() for i in range(1, len(random_rate)+1)])
print(ac_random_rate)

In [None]:
# dvi Feedback
feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET)

In [None]:
# dvi Feedback with noise
feedback_sampling(tm=tm, method=VIS_METHOD, round=ROUND, budget=BUDGET, noise_rate=TOLERANCE)