In [1]:
import os, sys
import numpy as np
import torch
import json
from scipy.special import softmax

sys.path.append("..")
from singleVis.SingleVisualizationModel import SingleVisualizationModel
from singleVis.data import DenseActiveLearningDataProvider
from singleVis.projector import DenseALProjector

In [2]:
CONTENT_PATH = "/home/xianglin/projects/DVI_data/active_learning/random/resnet18/MNIST"
GPU_ID = "0"
epoch_num = 20
iteration = 3

In [3]:
sys.path.append(CONTENT_PATH)
from config import config

In [4]:
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
BASE_ITERATION =config["BASE_ITERATION"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
S_LAMBDA = VISUALIZATION_PARAMETER["S_LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
INIT_NUM = VISUALIZATION_PARAMETER["INIT_NUM"]
ALPHA = VISUALIZATION_PARAMETER["ALPHA"]
BETA = VISUALIZATION_PARAMETER["BETA"]
MAX_HAUSDORFF = VISUALIZATION_PARAMETER["MAX_HAUSDORFF"]
HIDDEN_LAYER = VISUALIZATION_PARAMETER["HIDDEN_LAYER"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
T_N_EPOCHS = VISUALIZATION_PARAMETER["T_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]

# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")
model = SingleVisualizationModel(input_dims=512, output_dims=2, units=256, hidden_layer=HIDDEN_LAYER)

import Model.model as subject_model
net = eval("subject_model.{}()".format(NET))
data_provider = DenseActiveLearningDataProvider(CONTENT_PATH, net, BASE_ITERATION, epoch_num, split=-1, device=DEVICE, classes=CLASSES,verbose=1)
projector = DenseALProjector(vis_model=model, content_path=CONTENT_PATH, vis_model_name="al_hybrid", device=DEVICE)


Finish initialization...


In [5]:
from singleVis.visualizer import DenseALvisualizer
vis = DenseALvisualizer(data_provider, projector, 300)

In [6]:
data = data_provider.train_representation_lb(iteration, epoch_num)
labels = data_provider.train_labels_lb(iteration)
pred = data_provider.get_pred(iteration, epoch_num, data).argmax(1)
vis.savefig_cus(iteration, epoch_num, data, pred, labels)

no train labels saved for Iteration 3


AttributeError: 'NoneType' object has no attribute 'cpu'

In [5]:
samples = data_provider.train_representation(iteration, epoch_num)
pred = data_provider.get_pred(iteration, epoch_num, samples)
confidence = np.amax(softmax(pred, axis=1), axis=1)
uncertainty = 1-confidence

100%|██████████| 300/300 [00:00<00:00, 5256.83it/s]


In [6]:
samples = np.zeros((epoch_num, LEN, 512))
for i in range(1, epoch_num+1, 1):
    samples[i-1] = data_provider.train_representation(iteration, i)

In [7]:
embeddings_2d = np.zeros((epoch_num, LEN, 2))
for e in range(1, epoch_num+1, 1):
    embeddings_2d[e-1] = projector.batch_project(iteration, e, samples[e-1])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])
labels = data_provider.train_labels(iteration)

Successfully load the visualization model in iteration 1 for range (1,17]...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...
Successfully load the visualization model in iteration 1 for range (17,50]...
Same range as current visualization model...
Same range as current visualization model...
Same range as current visualization model...


In [8]:
path = os.path.join(CONTENT_PATH, "Model", "Iteration_{}".format(iteration),"trajectory_embeddings.npy")
np.save(path,embeddings_2d)

In [7]:
path = os.path.join(CONTENT_PATH, "Model", "Iteration_{}".format(iteration),"trajectory_embeddings.npy")
embeddings_2d = np.load(path)

In [7]:
samples.shape,uncertainty.shape, embeddings_2d.shape

((60000, 512), (60000,), (60000, 20, 2))

In [8]:
# remove label data
lb_idxs = data_provider.get_labeled_idx(iteration)
ulb_idxs = data_provider.get_unlabeled_idx(LEN, lb_idxs)

ulb_uncertainty = uncertainty[ulb_idxs]
ulb_trajectory = embeddings_2d[ulb_idxs]

In [11]:
from singleVis.trajectory_manager import TrajectoryManager, FeedbackTrajectoryManager, Recommender
import pickle
tm = Recommender(ulb_uncertainty, ulb_trajectory, 30, period=50,metric="a")#20.50,80
tm.clustered()
with open(os.path.join(CONTENT_PATH, "Model","Iteration_{}".format(iteration), 'sample_recommender.pkl'), 'wb') as f:
    pickle.dump(tm, f, pickle.HIGHEST_PROTOCOL)

In [9]:
import pickle
with open(os.path.join(CONTENT_PATH, "Model","Iteration_{}".format(iteration), 'sample_recommender.pkl'), 'rb') as f:
    tm = pickle.load(f)

In [10]:
data = data_provider.train_representation(iteration, epoch_num)
labels = data_provider.train_labels(iteration)
pred = data_provider.get_pred(iteration, epoch_num, data).argmax(1)
wrong_pred_idx = np.argwhere(pred!=labels).squeeze()
ulb_wrong = np.intersect1d(wrong_pred_idx, ulb_idxs)
print(len(ulb_wrong), len(ulb_wrong)/len(ulb_idxs))

100%|██████████| 300/300 [00:00<00:00, 4109.00it/s]

617 0.012854166666666667





In [11]:
ignore_rate = 0.1
remain_rate = 1-ignore_rate
test_len = 50

In [25]:
# random
print("Random sampling init")
s_rate = list()
pool = np.array(ulb_idxs)
for _ in range(10000):
    s_idxs = np.random.choice(pool,size=test_len,replace=False)
    # print(len(np.intersect1d(s_idxs, ulb_wrong)), len(s_idxs))
    s_rate.append(len(np.intersect1d(s_idxs, ulb_wrong))/test_len)
    # pool = np.setdiff1d(pool, s_idxs)
print("Success Rate:\t{:.4f}".format(sum(s_rate)/len(s_rate)))


Random sampling init
Success Rate:	0.5006


In [12]:
# random
print("Random sampling feedback")
s_rate = list()
pool = np.array(ulb_idxs)
for _ in range(10):
    s_idxs = np.random.choice(pool,size=test_len,replace=False)
    # print(len(np.intersect1d(s_idxs, ulb_wrong)), len(s_idxs))
    s_rate.append(len(np.intersect1d(s_idxs, ulb_wrong))/test_len)
    pool = np.setdiff1d(pool, s_idxs)
print("Success Rate:\t{:.4f}".format(sum(s_rate)/len(s_rate)))
print(s_rate)

Random sampling feedback
Success Rate:	0.0060
[0.0, 0.02, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.02]


In [13]:
print("TB sampling initialization:")
init_rate = list()
for _ in range(10000):
    correct = np.array([]).astype(np.int32)
    wrong = np.array([]).astype(np.int32)
    map_ulb =ulb_idxs.tolist()
    
    map_acc_idxs = np.array([map_ulb.index(i) for i in correct]).astype(np.int32)
    map_rej_idxs = np.array([map_ulb.index(i) for i in wrong]).astype(np.int32)

    suggest_idxs, _ = tm.sample_batch_init(map_acc_idxs, map_rej_idxs, test_len)
    suggest_idxs = ulb_idxs[suggest_idxs]

    correct = np.intersect1d(suggest_idxs, ulb_wrong)
    wrong = np.setdiff1d(suggest_idxs, correct)
    init_rate.append(len(correct)/test_len)
    # print(len(correct),test_len)
print("Init success Rate:\t{:.4f}".format(sum(init_rate)/len(init_rate)))


TB sampling initialization:
Init success Rate:	0.0156


In [14]:
# kernel regression
print("TB sampling:")
s_rate = list()
for _ in range(9):
    map_acc_idxs = np.array([map_ulb.index(i) for i in correct]).astype(np.int32)
    map_rej_idxs = np.array([map_ulb.index(i) for i in wrong]).astype(np.int32)
    suggest_idxs,_ = tm.sample_batch(map_acc_idxs, map_rej_idxs, test_len)
    suggest_idxs = ulb_idxs[suggest_idxs]
    c = np.intersect1d(np.intersect1d(suggest_idxs, ulb_idxs), ulb_wrong)
    w = np.setdiff1d(suggest_idxs, c)
    # print(len(c), test_len)
    s_rate.append(len(c) / test_len)
    correct = np.concatenate((correct, c), axis=0)
    wrong = np.concatenate((wrong, w), axis=0)
print("Success Rate:\t{:.4f}".format(sum(s_rate)/len(s_rate)))
print(s_rate)

TB sampling:
Success Rate:	0.4267
[0.02, 0.64, 0.66, 0.44, 0.42, 0.46, 0.46, 0.34, 0.4]
