In [1]:
import numpy as np
from dataLoader import importData


data_set = 1
percentage_anomalies = [0.1, 1, 10, 20, 30]

x_train, y_train, x_test, y_test, dataset_name = importData(data_set)
#x_train, x_test = pca_transform(x_train, x_test)

x_train = np.concatenate([x_train, x_test], axis=0)
y_train = np.concatenate([y_train, y_test])
num_class = np.max(y_train) + 1
print('Number of classes:', num_class)

Data-set: MIT-Places-Small
Number of classes: 5


In [2]:
store_folder = '../stored_results/'

In [3]:
import random
from shell_anon import NormalizedClusterLearner
from normalization import InstanceNormalization, ErgoNormalization, NaiveNormalization, NoNormalization, PreTrainedNormalization
from sklearn.metrics import roc_auc_score, average_precision_score, pairwise_distances


def build_eval_set(x_train, y_train, ind, p_anon):
    x_in = x_train[y_train==ind]
    x_out = x_train[y_train!=ind]
    random.shuffle(x_out)

    num_out = int(p_anon/100 * x_in.shape[0])
    data = np.concatenate([x_in, x_out[:num_out]], axis=0)
    gt = np.zeros(data.shape[0], dtype=int)
    gt[:data.shape[0] -num_out] = 1
    
    return data, gt



class AnonEvaluationStatistics():
    def __init__(self, 
                 percentiles =[0.1, 1, 10, 20, 30], 
                 name = 'unnamed'):
        self.percentiles = percentiles
        self.name = name
        self.auroc = None
        self.auprc = None
        self.mean_auroc = None
        self.mean_auprc = None
    
    
    def eval(self, x_train, y_train, clf, print_summary=True):
        num_class = np.max(y_train) + 1
        
        auroc_scores = np.zeros([num_class, len(self.percentiles)])
        auprc_scores = np.zeros([num_class, len(self.percentiles)])

        for class_num in range(num_class):
            for anon_ind, p_anon in enumerate(self.percentiles):
                data, gt = build_eval_set(x_train, y_train, class_num, p_anon)
                
                clf.fit(data)
                score = clf.score_samples(data)
                auroc = roc_auc_score(gt, score)
                auprc = average_precision_score(gt, score)

                
                auroc_scores[class_num, anon_ind] = auroc
                auprc_scores[class_num, anon_ind] = auprc
                
                if print_summary:
                    print('class:', class_num + 1, '/', num_class,
                          ', anon percentage:', p_anon, 
                          ', auroc:', auroc)
        
        self.auroc = auroc_scores
        self.auprc = auprc_scores
                
        self.mean_auroc = np.mean(auroc_scores, axis=0)
        self.mean_auprc = np.mean(auprc_scores, axis=0)
            


                
    
# num_clus = 5
# clf_ergo = NormalizedClusterLearner(num_clus = num_clus, norm = ErgoNormalization())
# eval_name = dataset_name + '_naive_ergo_normalization_' + str(num_clus)
# anon_eval = AnonEvaluationStatistics(name = eval_name)
# anon_eval.eval(x_train, y_train, clf_ergo)
    

In [4]:
from shell_anon import NormalizedClusterLearner, NormalizedBayesClusterLearner

num_clus = 1
clf_ergo = NormalizedClusterLearner(num_clus = num_clus, norm = InstanceNormalization())
eval_name = dataset_name + '_shell_instance_normalization_' + str(num_clus)
anon_eval = AnonEvaluationStatistics(name = eval_name)
anon_eval.eval(x_train, y_train, clf_ergo)
    

class: 1 / 5 , anon percentage: 0.1 , auroc: 0.9725262931959648
class: 1 / 5 , anon percentage: 1 , auroc: 0.9803502066759446
class: 1 / 5 , anon percentage: 10 , auroc: 0.9744926572918181
class: 1 / 5 , anon percentage: 20 , auroc: 0.9612360057361317
class: 1 / 5 , anon percentage: 30 , auroc: 0.9341643594774813
class: 2 / 5 , anon percentage: 0.1 , auroc: 0.9829362524146813
class: 2 / 5 , anon percentage: 1 , auroc: 0.9839228963712273
class: 2 / 5 , anon percentage: 10 , auroc: 0.9512629042643792
class: 2 / 5 , anon percentage: 20 , auroc: 0.9258445292628781
class: 2 / 5 , anon percentage: 30 , auroc: 0.8878578102878389
class: 3 / 5 , anon percentage: 0.1 , auroc: 0.894934535308006
class: 3 / 5 , anon percentage: 1 , auroc: 0.8046444966038676
class: 3 / 5 , anon percentage: 10 , auroc: 0.864304260224747
class: 3 / 5 , anon percentage: 20 , auroc: 0.8470634468842705
class: 3 / 5 , anon percentage: 30 , auroc: 0.8132687988944859
class: 4 / 5 , anon percentage: 0.1 , auroc: 0.9097445803

In [5]:
print(anon_eval.mean_auroc)

[0.9507405  0.91311302 0.89216439 0.83610549 0.7695025 ]


In [6]:
import pickle

store_path = store_folder + '/' + eval_name + '.pickle'
with open(store_path, 'wb') as file:
    pickle.dump(anon_eval, file) 


In [7]:
with open(store_path, 'rb') as file:
    b = pickle.load(file)