In [None]:
import os
import numpy as np
import sklearn
import sklearn.metrics
import matplotlib.pyplot as plt
# Disable warnings
np.seterr(divide='ignore')
np.seterr(invalid='ignore')

In [None]:

# Functions to evaluate models

def eval_model(model, Xs, ys, certainty_threshold):
    accuracies = []
    num_updated = []
    for X, y in zip(Xs, ys):
        cur_model = model(X[:warm_start_cutoff], y[:warm_start_cutoff], k, window_size, certainty_threshold=certainty_threshold)
    
        preds = []
        num_updates = 0
        for i in range(warm_start_cutoff, y.shape[0]):
            pred, added = cur_model.predict_update(X[i], y[i])
            preds.append(pred)
            if added :
                num_updates +=1 

        accuracies.append(sklearn.metrics.accuracy_score(np.array(preds), y[warm_start_cutoff:]))
        num_updated.append(num_updates)
    
    return np.array(accuracies), np.array(num_updated)


def eval_baseline(model, Xs, ys):
    accuracies = []
    for X, y in zip(Xs, ys):
        cur_model = model(X[:warm_start_cutoff], y[:warm_start_cutoff], k, window_size)

        preds = []
        for i in range(warm_start_cutoff, y.shape[0]):
            preds.append(cur_model.predict_update(X[i], y[i]))

        accuracies.append(sklearn.metrics.accuracy_score(np.array(preds), y[warm_start_cutoff:]))
    
    return np.array(accuracies)


In [None]:
from models.KnnC2 import KnnUnsupervisedC2
from models.KnnC1 import KnnUnsupervisedC1
from models.KnnC3 import KnnUnsupervisedC3

# functions for finding the optimal thresholds

def eval_thresholds(Xs, y, thresholds1, thresholds2, thresholds3):
    all_accs_thres3 = []
    accs_c1 = []
    accs_c3 = []
    num_updates_c2 = []
    num_updates_c1 = []
    num_updates_c3 = []

    for thres in thresholds2:
        acc_c1, num_updates = eval_model(KnnUnsupervisedC1, Xs, y, thres)
        print(f'{thres}:      {np.mean(acc_c1):.3f} +- {np.std(acc_c1):.3f}')
        accs_c1.append(acc_c1)
        num_updates_c1

    for thres in thresholds1:
        acc_c2, num_updates = eval_model(KnnUnsupervisedC2, Xs, y, thres)
        print(f'{thres}:     {np.mean(acc_c2):.3f} +- {np.std(acc_c2):.3f}')
        all_accs_thres3.append(acc_c2)
        num_updates_c2.append(num_updates)

    for thres in thresholds3:
        acc_c3, num_updates = eval_model(KnnUnsupervisedC3, Xs, y, thres)
        print(f'{thres}:      {np.mean(acc_c3):.3f} +- {np.std(acc_c3):.3f}')
        accs_c3.append(acc_c3)
        num_updates_c3.append(num_updates)

    
    return all_accs_thres3, accs_c1, accs_c3, num_updates_c2, num_updates_c1, num_updates_c3 
    

In [None]:
from baseline_models.supervised import KnnSupervised
from baseline_models.unsupervised_upper_bound import KnnUnsupervisedUpperBound
from baseline_models.unsupervised_naive import KnnUnsupervisedNaive
from baseline_models.unsupervised_naive_nolearn import KnnUnsupervisedNaiveNoLearn
from models.KnnC1 import KnnUnsupervisedC1
from models.KnnC2 import KnnUnsupervisedC2
from models.KnnC3 import KnnUnsupervisedC3


# experimental function

def dataset_experiment(Xs, y, threshold_basis=None, threshold_neighbor1=None, threshold_neighbor2=None, f_out=None):
    print(len(Xs))

    out = 'method,mean,std\n'
    acc_baseline_supervised = eval_baseline(KnnSupervised, Xs, y)
    print(f'Supervised:  {np.mean(acc_baseline_supervised):.3f} +- {np.std(acc_baseline_supervised):.3f}')
    out += 'Supervised,' + str(np.mean(acc_baseline_supervised)) + ',' + str(np.std(acc_baseline_supervised)) + '\n'

    acc_baseline_upper_bound = eval_baseline(KnnUnsupervisedUpperBound, Xs, y)
    print(f'Upper Bound: {np.mean(acc_baseline_upper_bound):.3f} +- {np.std(acc_baseline_upper_bound):.3f}')
    out += 'Perfect Policy,' + str(np.mean(acc_baseline_upper_bound)) + ',' + str(np.std(acc_baseline_upper_bound)) + '\n'

    acc_baseline_naive = eval_baseline(KnnUnsupervisedNaive, Xs, y)
    print(f'Naive:       {np.mean(acc_baseline_naive):.3f} +- {np.std(acc_baseline_naive):.3f}')
    out += 'Naive,' + str(np.mean(acc_baseline_naive)) + ',' + str(np.std(acc_baseline_naive)) + '\n'

    acc_baseline_nolearn = eval_baseline(KnnUnsupervisedNaiveNoLearn, Xs, y)
    print(f'No Learn:    {np.mean(acc_baseline_nolearn):.3f} +- {np.std(acc_baseline_nolearn):.3f}')
    out += 'No Learn,' + str(np.mean(acc_baseline_nolearn)) + ',' + str(np.std(acc_baseline_nolearn)) + '\n'

    
    acc_c1, _ = eval_model(KnnUnsupervisedC1, Xs, y, threshold_neighbor1)
    print(f'c1:      {np.mean(acc_c1):.3f} +- {np.std(acc_c1):.3f}')
    out += 'c1,' + str(np.mean(acc_c1)) + ',' + str(np.std(acc_c1)) + '\n'
    
    acc_c2, _ = eval_model(KnnUnsupervisedC2, Xs, y, threshold_basis)
    print(f'c2:      {np.mean(acc_c2):.3f} +- {np.std(acc_c2):.3f}')
    out += 'c2,' + str(np.mean(acc_c2)) + ',' + str(np.std(acc_c2)) + '\n'

    acc_c3, _ = eval_model(KnnUnsupervisedC3, Xs, y, threshold_neighbor2)
    print(f'c3:      {np.mean(acc_c3):.3f} +- {np.std(acc_c3):.3f}')
    out += 'c3,' + str(np.mean(acc_c3)) + ',' + str(np.std(acc_c3)) + '\n'
    
    if f_out is not None:
        file = open(f_out, "w") 
        file.write(out)
        file.close() 

    plt.show()



# Leaky 

In [None]:
# Set experimental parameters
warm_start_cutoff = 50
window_size = 50
k = 5

In [None]:
# Load dataset
dataset = np.load('Leaky.npz')
Xs = dataset['Xs']
ys = dataset['ys']
print(Xs.shape)
print(ys.shape)

In [None]:
# run experiments with automatically determined threshold
dataset_experiment(Xs, ys, f_out='results_leaky.csv')

In [None]:
# finding optimal threshold
leaky_basis3, leaky_basis4, leaky_basis5, leaky_updates_thres3, leaky_updates_thres4, leaky_updates_thres5 = eval_thresholds(Xs, ys, np.linspace(0,1,41), np.linspace(-1,1,81), np.linspace(-1,1,81))

In [None]:
leaky3 = np.vstack(leaky_basis3)
print(np.mean(np.max(leaky3, axis=0)), np.std(np.max(leaky3, axis=0)))

leaky4 = np.vstack(leaky_basis4)
print(np.mean(np.max(leaky4, axis=0)), np.std(np.max(leaky4, axis=0)))

leaky5 = np.vstack(leaky_basis5)
print(np.mean(np.max(leaky5, axis=0)), np.std(np.max(leaky5, axis=0)))

In [None]:
plt.figure()
for i in range(leaky3.shape[0]):
    plt.plot(np.linspace(-1,1,81),leaky4[:,i], c='tab:blue', alpha=0.3, label='c1')

    plt.plot(np.linspace(0,1,41),leaky3[:,i], c='tab:orange', alpha=0.3, label='c2')
    plt.plot(np.linspace(-1,1,81),leaky5[:,i], c='tab:green', alpha=0.3, label='c3')
    plt.ylabel('Accuracy')
    plt.xlabel('Theta')

plt.show()

# HSI

In [None]:
warm_start_cutoff = 100
window_size = 100
k = 5

# unfortunately the HSI dataset cannot be made publically available, the experimental code for the experiment is the same as for Leaky