In [1]:
import numpy as np
from dataLoader import importData


data_set = 5
percentage_anomalies = [0.1, 1, 10, 20, 30]

x_train, y_train, x_test, y_test, dataset_name = importData(data_set)
#x_train, x_test = pca_transform(x_train, x_test)

x_train = np.concatenate([x_train, x_test], axis=0)
y_train = np.concatenate([y_train, y_test])
num_class = np.max(y_train) + 1
print('Number of classes:', num_class)

Data-set: STL-10
Number of classes: 10


In [2]:
store_folder = '../stored_results/'

In [3]:
import random
from shell_anon import NormalizedClusterLearner
from normalization import InstanceNormalization, ErgoNormalization, NaiveNormalization, NoNormalization, PreTrainedNormalization
from sklearn.metrics import roc_auc_score, average_precision_score, pairwise_distances


def build_eval_set(x_train, y_train, ind, p_anon):
    x_in = x_train[y_train==ind]
    x_out = x_train[y_train!=ind]
    random.shuffle(x_out)

    num_out = int(p_anon/100 * x_in.shape[0])
    data = np.concatenate([x_in, x_out[:num_out]], axis=0)
    gt = np.zeros(data.shape[0], dtype=int)
    gt[:data.shape[0] -num_out] = 1
    
    return data, gt



class AnonEvaluationStatistics():
    def __init__(self, 
                 percentiles =[0.1, 1, 10, 20, 30], 
                 name = 'unnamed'):
        self.percentiles = percentiles
        self.name = name
        self.auroc = None
        self.auprc = None
        self.mean_auroc = None
        self.mean_auprc = None
    
    
    def eval(self, x_train, y_train, clf, print_summary=True):
        num_class = np.max(y_train) + 1
        
        auroc_scores = np.zeros([num_class, len(self.percentiles)])
        auprc_scores = np.zeros([num_class, len(self.percentiles)])

        for class_num in range(num_class):
            for anon_ind, p_anon in enumerate(self.percentiles):
                data, gt = build_eval_set(x_train, y_train, class_num, p_anon)
                
                clf.fit(data)
                score = clf.score_samples(data)
                auroc = roc_auc_score(gt, score)
                auprc = average_precision_score(gt, score)

                
                auroc_scores[class_num, anon_ind] = auroc
                auprc_scores[class_num, anon_ind] = auprc
                
                if print_summary:
                    print('class:', class_num + 1, '/', num_class,
                          ', anon percentage:', p_anon, 
                          ', auroc:', auroc)
        
        self.auroc = auroc_scores
        self.auprc = auprc_scores
                
        self.mean_auroc = np.mean(auroc_scores, axis=0)
        self.mean_auprc = np.mean(auprc_scores, axis=0)
            


                
    
# num_clus = 5
# clf_ergo = NormalizedClusterLearner(num_clus = num_clus, norm = ErgoNormalization())
# eval_name = dataset_name + '_naive_ergo_normalization_' + str(num_clus)
# anon_eval = AnonEvaluationStatistics(name = eval_name)
# anon_eval.eval(x_train, y_train, clf_ergo)
    

In [4]:
norm = InstanceNormalization()
norm.fit(x_train)
x_train_ = norm.transform(x_train)


In [5]:
from shell_anon import NormalizedClusterLearner, NormalizedBayesClusterLearner
from sklearn.svm import OneClassSVM

num_clus = 300
clf = OneClassSVM(nu=0.99)
eval_name = ''
anon_eval = AnonEvaluationStatistics(name = eval_name)
anon_eval.eval(x_train_, y_train, clf)
    

class: 1 / 10 , anon percentage: 0.1 , auroc: 0.9992307692307693
class: 1 / 10 , anon percentage: 1 , auroc: 0.9992307692307693
class: 1 / 10 , anon percentage: 10 , auroc: 0.9969644970414202
class: 1 / 10 , anon percentage: 20 , auroc: 0.9948343195266274
class: 1 / 10 , anon percentage: 30 , auroc: 0.9902741617357003
class: 2 / 10 , anon percentage: 0.1 , auroc: 1.0
class: 2 / 10 , anon percentage: 1 , auroc: 0.9874556213017751
class: 2 / 10 , anon percentage: 10 , auroc: 0.9656745562130177
class: 2 / 10 , anon percentage: 20 , auroc: 0.8903284023668641
class: 2 / 10 , anon percentage: 30 , auroc: 0.8022347140039449
class: 3 / 10 , anon percentage: 0.1 , auroc: 0.9976923076923077


KeyboardInterrupt: 

In [None]:
print(anon_eval.mean_auroc)

In [None]:
# import pickle

# store_path = store_folder + '/' + eval_name + '.pickle'
# with open(store_path, 'wb') as file:
#     pickle.dump(anon_eval, file) 


In [None]:
# with open(store_path, 'rb') as file:
#     b = pickle.load(file)