In [1]:
import os, sys
import numpy as np
import torch
import json

sys.path.append("..")
from singleVis.SingleVisualizationModel import SingleVisualizationModel
from singleVis.data import NormalDataProvider
from singleVis.eval.evaluator import Evaluator
from singleVis.projector import Projector

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = "symmetric"

In [3]:
path = "/home/xianglin/data/noisy/{}/embedding.npy".format(dataset)
embeddings = np.load(path)

path = "/home/xianglin/data/noisy/{}/clean_label.json".format(dataset)
with open(path, "r") as f:
    clean_label = json.load(f)
path = "/home/xianglin/data/noisy/{}/noisy_label.json".format(dataset)
with open(path, "r") as f:
    noisy_label = json.load(f)

clean_label = np.array(clean_label)
noisy_label = np.array(noisy_label)

In [4]:
CONTENT_PATH = "/home/xianglin/data/noisy/symmetric"
sys.path.append(CONTENT_PATH)
from config import config

In [5]:
GPU_ID = 0
SETTING = config["SETTING"]
CLASSES = config["CLASSES"]
DATASET = config["DATASET"]
PREPROCESS = config["VISUALIZATION"]["PREPROCESS"]
EPOCH_START = config["EPOCH_START"]
EPOCH_END = config["EPOCH_END"]
EPOCH_PERIOD = config["EPOCH_PERIOD"]

# Training parameter (subject model)
TRAINING_PARAMETER = config["TRAINING"]
NET = TRAINING_PARAMETER["NET"]
LEN = TRAINING_PARAMETER["train_num"]

# Training parameter (visualization model)
VISUALIZATION_PARAMETER = config["VISUALIZATION"]
LAMBDA = VISUALIZATION_PARAMETER["LAMBDA"]
S_LAMBDA = VISUALIZATION_PARAMETER["S_LAMBDA"]
B_N_EPOCHS = VISUALIZATION_PARAMETER["BOUNDARY"]["B_N_EPOCHS"]
L_BOUND = VISUALIZATION_PARAMETER["BOUNDARY"]["L_BOUND"]
INIT_NUM = VISUALIZATION_PARAMETER["INIT_NUM"]
ALPHA = VISUALIZATION_PARAMETER["ALPHA"]
BETA = VISUALIZATION_PARAMETER["BETA"]
MAX_HAUSDORFF = VISUALIZATION_PARAMETER["MAX_HAUSDORFF"]
HIDDEN_LAYER = VISUALIZATION_PARAMETER["HIDDEN_LAYER"]
S_N_EPOCHS = VISUALIZATION_PARAMETER["S_N_EPOCHS"]
T_N_EPOCHS = VISUALIZATION_PARAMETER["T_N_EPOCHS"]
N_NEIGHBORS = VISUALIZATION_PARAMETER["N_NEIGHBORS"]
PATIENT = VISUALIZATION_PARAMETER["PATIENT"]
MAX_EPOCH = VISUALIZATION_PARAMETER["MAX_EPOCH"]
SEGMENTS = VISUALIZATION_PARAMETER["SEGMENTS"]
RESUME_SEG = VISUALIZATION_PARAMETER["RESUME_SEG"]
# define hyperparameters
DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")

content_path = CONTENT_PATH
sys.path.append(content_path)

import Model.model as subject_model
# net = resnet18()
net = eval("subject_model.{}()".format(NET))
classes = ("airplane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")
data_provider = NormalDataProvider(CONTENT_PATH, net, EPOCH_START, EPOCH_END, EPOCH_PERIOD, split=-1, device=DEVICE, classes=CLASSES,verbose=1)
if PREPROCESS:
    data_provider.initialize(LEN//10, l_bound=L_BOUND)

model = SingleVisualizationModel(input_dims=512, output_dims=2, units=256, hidden_layer=HIDDEN_LAYER)
projector = Projector(vis_model=model, content_path=CONTENT_PATH, segments=SEGMENTS, device=DEVICE)

Finish initialization...


In [6]:
samples = np.zeros((200, 50000, 512))
for i in range(1, 201, 1):
    samples[i-1] = data_provider.train_representation(i)

In [None]:
embeddings_2d = np.zeros((200, 50000, 2))
for e in range(1, 201, 1):
    embeddings_2d[e-1] = projector.batch_project(e, samples[e-1])
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])


In [11]:
samples = data_provider.train_representation(200)
path = os.path.join(CONTENT_PATH, "Model","embeddings.npy")
embeddings_2d = np.load(path)
embeddings_2d = np.transpose(embeddings_2d, [1,0,2])

In [7]:
noise_idxs = np.argwhere(clean_label!=noisy_label).squeeze()

In [10]:
path = os.path.join(CONTENT_PATH, "Model","embeddings.npy")
np.save(path,embeddings_2d)

In [10]:
samples.shape, embeddings_2d.shape

((50000, 512), (200, 50000, 2))

In [12]:
from trajectory_manager import TrajectoryManager, FeedbackTrajectoryManager
tm = FeedbackTrajectoryManager(samples, embeddings_2d,30, period=100,metric="v")

In [13]:
tm.clustered()

In [14]:
selected = tm.sample_batch(100)
print(len(np.intersect1d(selected, noise_idxs)), len(selected))
correct = np.intersect1d(selected, noise_idxs)

23 100


In [18]:
# just sampling
for _ in range(10):
    selected = tm.sample_batch(100)
    print(len(np.intersect1d(selected, noise_idxs)), len(selected))

25 100
27 100
22 100
21 100
22 100
42 100
30 100
19 100
27 100
20 100


In [15]:
# hybrid
for _ in range(10):
    tm.update_belief(correct)
    selected = tm.sample_batch(100)
    print(len(np.intersect1d(selected, noise_idxs)), len(selected))
    correct = np.intersect1d(selected, noise_idxs)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


28 100
34 100
36 100
33 100
32 100
30 100
27 100
33 100
44 100
38 100
