In [1]:
import numpy as np
from dataLoader import importData


data_set = 3
percentage_anomalies = [0.1, 1, 10, 20, 30]

x_train, y_train, x_test, y_test, dataset_name = importData(data_set)
#x_train, x_test = pca_transform(x_train, x_test)

x_train = np.concatenate([x_train, x_test], axis=0)
y_train = np.concatenate([y_train, y_test])
num_class = np.max(y_train) + 1
print('Number of classes:', num_class)

Data-set: mnist
Number of classes: 10


In [2]:
store_folder = '../stored_results/'

In [3]:
import random
from shell_anon import NormalizedClusterLearner, NormalizedOcsvmLearner
from normalization import InstanceNormalization, ErgoNormalization, NaiveNormalization, NoNormalization, PreTrainedNormalization
from sklearn.metrics import roc_auc_score, average_precision_score, pairwise_distances


def build_eval_set(x_train, y_train, ind, p_anon):
    x_in = x_train[y_train==ind]
    x_out = x_train[y_train!=ind]
    random.shuffle(x_out)

    num_out = int(p_anon/100 * x_in.shape[0])
    data = np.concatenate([x_in, x_out[:num_out]], axis=0)
    gt = np.zeros(data.shape[0], dtype=int)
    gt[:data.shape[0] -num_out] = 1
    
    return data, gt



class AnonEvaluationStatistics():
    def __init__(self, 
                 percentiles =[0.1, 1, 10, 20, 30], 
                 name = 'unnamed'):
        self.percentiles = percentiles
        self.name = name
        self.auroc = None
        self.auprc = None
        self.mean_auroc = None
        self.mean_auprc = None
    
    
    def eval(self, x_train, y_train, clf, print_summary=True):
        num_class = np.max(y_train) + 1
        
        auroc_scores = np.zeros([num_class, len(self.percentiles)])
        auprc_scores = np.zeros([num_class, len(self.percentiles)])

        for class_num in range(num_class):
            for anon_ind, p_anon in enumerate(self.percentiles):
                data, gt = build_eval_set(x_train, y_train, class_num, p_anon)
                
                clf.fit(data)
                score = clf.score_samples(data)
                auroc = roc_auc_score(gt, score)
                auprc = average_precision_score(gt, score)

                
                auroc_scores[class_num, anon_ind] = auroc
                auprc_scores[class_num, anon_ind] = auprc
                
                if print_summary:
                    print('class:', class_num + 1, '/', num_class,
                          ', anon percentage:', p_anon, 
                          ', auroc:', auroc)
        
        self.auroc = auroc_scores
        self.auprc = auprc_scores
                
        self.mean_auroc = np.mean(auroc_scores, axis=0)
        self.mean_auprc = np.mean(auprc_scores, axis=0)
            




clf_ocvsm = NormalizedOcsvmLearner()
eval_name = dataset_name + '_ocsvm'
anon_eval = AnonEvaluationStatistics(name = eval_name)
anon_eval.eval(x_train, y_train, clf_ocvsm)
    

class: 1 / 10 , anon percentage: 0.1 , auroc: 0.966681153121831
class: 1 / 10 , anon percentage: 1 , auroc: 0.973622054683219
class: 1 / 10 , anon percentage: 10 , auroc: 0.9648980594448537
class: 1 / 10 , anon percentage: 20 , auroc: 0.9361176720056602
class: 1 / 10 , anon percentage: 30 , auroc: 0.8896596102933612
class: 2 / 10 , anon percentage: 0.1 , auroc: 0.9941602132791673
class: 2 / 10 , anon percentage: 1 , auroc: 0.9776678613164584
class: 2 / 10 , anon percentage: 10 , auroc: 0.9703835931061416
class: 2 / 10 , anon percentage: 20 , auroc: 0.9459530761650858
class: 2 / 10 , anon percentage: 30 , auroc: 0.9183188991600707
class: 3 / 10 , anon percentage: 0.1 , auroc: 0.9504291845493562
class: 3 / 10 , anon percentage: 1 , auroc: 0.8918434202069208
class: 3 / 10 , anon percentage: 10 , auroc: 0.840350920280556
class: 3 / 10 , anon percentage: 20 , auroc: 0.8047205183779812
class: 3 / 10 , anon percentage: 30 , auroc: 0.7630792814586953
class: 4 / 10 , anon percentage: 0.1 , auro

In [8]:
print(anon_eval.mean_auroc)

[0.93742798 0.90063751 0.8851432  0.85582556 0.82388717]


In [6]:
import pickle

store_path = store_folder + '/' + eval_name + '.pickle'
with open(store_path, 'wb') as file:
    pickle.dump(anon_eval, file) 


In [7]:
with open(store_path, 'rb') as file:
    b = pickle.load(file)