In [1]:
import numpy as np
from dataLoader import importData


data_set = 5
percentage_anomalies = [0.1, 1, 10, 20, 30]

x_train, y_train, x_test, y_test, dataset_name = importData(data_set)
#x_train, x_test = pca_transform(x_train, x_test)

x_train = np.concatenate([x_train, x_test], axis=0)
y_train = np.concatenate([y_train, y_test])
num_class = np.max(y_train) + 1
print('Number of classes:', num_class)

Data-set: mnist
Number of classes: 10


In [2]:
store_folder = '../stored_results/'

In [3]:
import random
from shell_anon import NormalizedClusterLearner
from normalization import InstanceNormalization, ErgoNormalization, NaiveNormalization, NoNormalization, PreTrainedNormalization
from sklearn.metrics import roc_auc_score, average_precision_score, pairwise_distances


def build_eval_set(x_train, y_train, ind, p_anon):
    x_in = x_train[y_train==ind]
    x_out = x_train[y_train!=ind]
    random.shuffle(x_out)

    num_out = int(p_anon/100 * x_in.shape[0])
    data = np.concatenate([x_in, x_out[:num_out]], axis=0)
    gt = np.zeros(data.shape[0], dtype=int)
    gt[:data.shape[0] -num_out] = 1
    
    return data, gt



class AnonEvaluationStatistics():
    def __init__(self, 
                 percentiles =[0.1, 1, 10, 20, 30], 
                 name = 'unnamed'):
        self.percentiles = percentiles
        self.name = name
        self.auroc = None
        self.auprc = None
        self.mean_auroc = None
        self.mean_auprc = None
    
    
    def eval(self, x_train, y_train, clf, print_summary=True):
        num_class = np.max(y_train) + 1
        
        auroc_scores = np.zeros([num_class, len(self.percentiles)])
        auprc_scores = np.zeros([num_class, len(self.percentiles)])

        for class_num in range(num_class):
            for anon_ind, p_anon in enumerate(self.percentiles):
                data, gt = build_eval_set(x_train, y_train, class_num, p_anon)
                
                clf.fit(data)
                score = clf.score_samples(data)
                auroc = roc_auc_score(gt, score)
                auprc = average_precision_score(gt, score)

                
                auroc_scores[class_num, anon_ind] = auroc
                auprc_scores[class_num, anon_ind] = auprc
                
                if print_summary:
                    print('class:', class_num + 1, '/', num_class,
                          ', anon percentage:', p_anon, 
                          ', auroc:', auroc)
        
        self.auroc = auroc_scores
        self.auprc = auprc_scores
                
        self.mean_auroc = np.mean(auroc_scores, axis=0)
        self.mean_auprc = np.mean(auprc_scores, axis=0)
            


                
    
# num_clus = 5
# clf_ergo = NormalizedClusterLearner(num_clus = num_clus, norm = ErgoNormalization())
# eval_name = dataset_name + '_naive_ergo_normalization_' + str(num_clus)
# anon_eval = AnonEvaluationStatistics(name = eval_name)
# anon_eval.eval(x_train, y_train, clf_ergo)
    

In [4]:
from shell_anon import NormalizedClusterLearner, NormalizedBayesClusterLearner

num_clus = 300
clf_ergo = NormalizedBayesClusterLearner(num_clus = num_clus, norm = InstanceNormalization())
eval_name = dataset_name + '_bayes_instance_normalization_' + str(num_clus)
anon_eval = AnonEvaluationStatistics(name = eval_name)
anon_eval.eval(x_train, y_train, clf_ergo)
    

class: 1 / 10 , anon percentage: 0.1 , auroc: 0.9948814525085712
class: 1 / 10 , anon percentage: 1 , auroc: 0.9932123609352791
class: 1 / 10 , anon percentage: 10 , auroc: 0.9868223645673904
class: 1 / 10 , anon percentage: 20 , auroc: 0.976311286628162
class: 1 / 10 , anon percentage: 30 , auroc: 0.9613044388038248
class: 2 / 10 , anon percentage: 0.1 , auroc: 0.9977511380329712
class: 2 / 10 , anon percentage: 1 , auroc: 0.9932536466115239
class: 2 / 10 , anon percentage: 10 , auroc: 0.9910396488320508
class: 2 / 10 , anon percentage: 20 , auroc: 0.9832441244450894
class: 2 / 10 , anon percentage: 30 , auroc: 0.9683251554220409
class: 3 / 10 , anon percentage: 0.1 , auroc: 0.9635431568907965
class: 3 / 10 , anon percentage: 1 , auroc: 0.93129729841803
class: 3 / 10 , anon percentage: 10 , auroc: 0.9151794613600872
class: 3 / 10 , anon percentage: 20 , auroc: 0.8905753365220292
class: 3 / 10 , anon percentage: 30 , auroc: 0.8552117167177309
class: 4 / 10 , anon percentage: 0.1 , auro

  p_noty_div_p_y = mask.size / all_in_clus -1


class: 6 / 10 , anon percentage: 30 , auroc: 0.7986796210939635
class: 7 / 10 , anon percentage: 0.1 , auroc: 0.9881471785922048
class: 7 / 10 , anon percentage: 1 , auroc: 0.9790982274236046
class: 7 / 10 , anon percentage: 10 , auroc: 0.9719686134841945


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 7 / 10 , anon percentage: 20 , auroc: 0.9529530911206303


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 7 / 10 , anon percentage: 30 , auroc: 0.9459286831888027
class: 8 / 10 , anon percentage: 0.1 , auroc: 0.9775714481596833
class: 8 / 10 , anon percentage: 1 , auroc: 0.9457261148437618


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 8 / 10 , anon percentage: 10 , auroc: 0.9444977680271799


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 8 / 10 , anon percentage: 20 , auroc: 0.9243832097862599
class: 8 / 10 , anon percentage: 30 , auroc: 0.9079402357059099
class: 9 / 10 , anon percentage: 0.1 , auroc: 0.9461782661782661
class: 9 / 10 , anon percentage: 1 , auroc: 0.946431803490627
class: 9 / 10 , anon percentage: 10 , auroc: 0.9096103896103895
class: 9 / 10 , anon percentage: 20 , auroc: 0.9052102671443331


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 9 / 10 , anon percentage: 30 , auroc: 0.9025399807813095
class: 10 / 10 , anon percentage: 0.1 , auroc: 0.9933889048577178
class: 10 / 10 , anon percentage: 1 , auroc: 0.9697522609778755
class: 10 / 10 , anon percentage: 10 , auroc: 0.9554482082629383
class: 10 / 10 , anon percentage: 20 , auroc: 0.9317253009687994


  p_noty_div_p_y = mask.size / all_in_clus -1


class: 10 / 10 , anon percentage: 30 , auroc: 0.9151941562441939


In [7]:
print(anon_eval.mean_auroc)

[0.97438051 0.94816691 0.93816212 0.92293444 0.90446961]


In [5]:
import pickle

store_path = store_folder + '/' + eval_name + '.pickle'
with open(store_path, 'wb') as file:
    pickle.dump(anon_eval, file) 


In [6]:
with open(store_path, 'rb') as file:
    b = pickle.load(file)