In [None]:
# verify K-Center-greedy algorithm in coreset query strategy

In [None]:
# import modules
import sys
import numpy as np
import os
from sklearn.neighbors import NearestNeighbors
from scipy.spatial import distance_matrix

from deepvisualinsight.MMS import MMS
from deepvisualinsight import utils


In [None]:
content_path = "E:\\DVI_exp_data\\active_learning\\LeastConfidence"
sys.path.append(content_path)

from Model.model import *
net = ResNet18()
classes = ("airplane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

mms = MMS(content_path, net, 1, 20, 1, 512, 10, classes, cmap="tab10", resolution=100, neurons=256,
          verbose=1, temporal=False, split=-1, advance_border_gen=True, attack_device="cuda:0")

In [None]:
# hyperparameters
# EPOCH=10

In [None]:
# high-epsilon
epsilons = np.zeros(20)
for n_epoch in range(20):
    new_index_file = os.path.join(mms.model_path, "Epoch_{:d}".format(n_epoch+1), "index.json")
    new_index = utils.load_labelled_data_index(new_index_file)

    training_data = mms.training_data
    train_data = mms.get_representation_data(n_epoch, training_data)

    index_file = os.path.join(mms.model_path, "Epoch_{:d}".format(n_epoch), "index.json")
    index = utils.load_labelled_data_index(index_file)

    curr = train_data[new_index]
    #%%

    nbs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(new_index)
    dists, _ = nbs.kneighbors(train_data)
    dists = dists.squeeze()
    epsilons[n_epoch] = dists.max()

In [None]:
# high-epsilon
epsilons = np.zeros(20)
for n_epoch in range(20):
    new_index_file = os.path.join(mms.model_path, "Epoch_{:d}".format(n_epoch+1), "index.json")
    new_index = utils.load_labelled_data_index(new_index_file)
    training_data = mms.training_data
    train_data = mms.get_representation_data(n_epoch, training_data)

    index_file = os.path.join(mms.model_path, "Epoch_{:d}".format(n_epoch), "index.json")
    index = utils.load_labelled_data_index(index_file)

    repr_2d = mms.batch_project(train_data, n_epoch)
    curr = repr_2d[new_index]

    nbs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(curr)

    dists, _ = nbs.kneighbors(repr_2d)
    dists = dists.squeeze()
    epsilons[n_epoch] = dists.max()

In [None]:
def greedy_k_center(labeled, unlabeled, amount):

    greedy_indices = []

    # get the minimum distances between the labeled and unlabeled examples (iteratively, to avoid memory issues):
    min_dist = np.min(distance_matrix(labeled[0, :].reshape((1, labeled.shape[1])), unlabeled), axis=0)
    min_dist = min_dist.reshape((1, min_dist.shape[0]))
    for j in range(1, labeled.shape[0], 1000):
        if j + 1000 < labeled.shape[0]:
            dist = distance_matrix(labeled[j:j+1000, :], unlabeled)
        else:
            dist = distance_matrix(labeled[j:, :], unlabeled)
        min_dist = np.vstack((min_dist, np.min(dist, axis=0).reshape((1, min_dist.shape[1]))))
        min_dist = np.min(min_dist, axis=0)
        min_dist = min_dist.reshape((1, min_dist.shape[0]))

    # iteratively insert the farthest index and recalculate the minimum distances:
    farthest = np.argmax(min_dist)
    greedy_indices.append(farthest)
    for i in range(amount-1):
        dist = distance_matrix(unlabeled[greedy_indices[-1], :].reshape((1,unlabeled.shape[1])), unlabeled)
        min_dist = np.vstack((min_dist, dist.reshape((1, min_dist.shape[1]))))
        min_dist = np.min(min_dist, axis=0)
        min_dist = min_dist.reshape((1, min_dist.shape[0]))
        farthest = np.argmax(min_dist)
        greedy_indices.append(farthest)

    return np.array(greedy_indices)

In [None]:
epsilons = np.zeros(20)
for n_epoch in range(20):
    training_data = mms.training_data
    train_data = mms.get_representation_data(n_epoch, training_data)

    index_file = os.path.join(mms.model_path, "Epoch_{:d}".format(n_epoch), "index.json")
    index = utils.load_labelled_data_index(index_file)

    repr_2d = mms.batch_project(train_data, n_epoch)
    unl_idx = np.setdiff1d(np.arange(50000),np.array(index))
    new_index = greedy_k_center(repr_2d[index], repr_2d[unl_idx], 1000)
    all_index = index + new_index.tolist()

    nbs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(repr_2d[all_index])

    dists, _ = nbs.kneighbors(repr_2d)
    dists = dists.squeeze()
    epsilons[n_epoch] = dists.max()