In [1]:
import os
import sys
if '/home/zechengh/Mastik/ad/detector/' not in sys.path:
    sys.path.append('/home/zechengh/Mastik/ad/detector/')
from collections import OrderedDict
    
import numpy as np
import torch
import matplotlib.pyplot as plt
%matplotlib inline

import utils
import ADbenchmark
import LSTMAD

import json
import collections

import torch

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

np.random.seed(0)
torch.manual_seed(0)

# Do not write .pyc
sys.dont_write_bytecode = True

# Reload code when code is changed
%load_ext autoreload
%autoreload 2

id_to_feature = utils.id_to_feature
for k, v in id_to_feature.items():
    print(k, v)



0 Ins
1 L1D read access (# load)
2 L1D read miss
3 L1D write access (# store)
4 L1D write miss
5 L1D prefetch miss
6 L1I read miss
7 LLC read access
8 LLC read miss
9 LLC write access
10 LLC write miss
11 LLC prefetch access
12 LLC prefetch miss
13 DTLB read access
14 DTLB read miss
15 DTLB write access
16 DTLB write miss
17 ITLB read access
18 ITLB read miss
19 BPU read access
20 BPU read miss
21 Cache node read access
22 Cache node read miss
23 Cache node write access
24 Cache node write miss
25 Cache node prefetch access
26 Cache node prefetch miss
27 cycles
28 branch instructions
29 branch prediction miss
30 page faults
31 context switch
32 stall_during_issue
33 stall_during_retirement
34 Time stamp


In [2]:
pred_errors = collections.defaultdict(collections.defaultdict)
model_name = 'merged'

for bg_program in ['none', 'mysql', 'webserver', 'streamserver', 'mltrain', 'mapreduce']:
    data_dir = f'detector/preprocessed/pred_errors/{model_name}/{bg_program}/'.format(bg_program=bg_program)
    for f in os.listdir(data_dir):
        if f.endswith('.npy'):
            file_name = f.split('.')[0]
            pred_errors[bg_program][file_name] = np.load(os.path.join(data_dir, f))

In [3]:
training_data = []
sampling = True

for file_name in [
    "train_abnormal_l1pp",
    "train_abnormal_l3pp",
    "train_abnormal_fr",
    "train_abnormal_ff",
    "train_abnormal_spectrev1",
    "train_abnormal_spectrev2",
    "train_abnormal_spectrev3",
    "train_abnormal_spectrev4",
    "train_abnormal_bufferoverflow",
]:
    d = pred_errors['none'][file_name][:5000]
    if sampling:
        sampling_idx = np.random.randint(low=0, high=len(d), size=1000)
        d = d[sampling_idx, :]
        
    training_data.append(d)
    
training_data = np.concatenate(training_data, axis=0)

np.save('train', training_data)

In [4]:
from sklearn.neighbors import KernelDensity
from pathlib import Path
import concurrent

kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(training_data)

kde_results = collections.defaultdict(collections.defaultdict)
for bg_program in ['none']:
        
    def known_attack_detection(kde, data, bg_program, file_name):  
        kde_result = kde.score_samples(data)
        total = np.float32(len(kde_result))
        
        Path(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}').mkdir(parents=True, exist_ok=True)
        np.save(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}', kde_result)
        print(bg_program, file_name, kde_result)
        return 1
        
    executor = concurrent.futures.ProcessPoolExecutor(20)
    futures = [executor.submit(known_attack_detection, kde, pred_errors[bg_program][file_name], bg_program, file_name) for file_name in pred_errors[bg_program].keys()]
    concurrent.futures.wait(futures)

none ref_and_val_normal_with_gpg [-59.50370713 -47.86043175 -42.42232325 ... -49.10461337 -49.10448715
 -49.10458094]
none test_abnormal_fr_with_spec [-93.06931294 -70.19797474 -22.94290074 ... -57.77727685 -77.25244595
 -60.50284838]
none test_normal_with_astar [-457.85404948 -109.00499091  -86.93527187 ...  -19.58339432  -58.23893306
  -20.09387584]
none test_normal_with_povray [-53.45950653 -56.59073818 -22.72822447 ... -19.93324536 -20.08916718
 -21.19456285]
none test_abnormal_l3pp_with_gobmk [-21.5403941  -67.70309455 -18.85205911 ... -31.54842894 -31.66532449
 -19.49730834]
none test_abnormal_spectrev1_with_gcc [-125.52435123  -54.89344274  -13.29826961 ...   14.23784529   14.21880168
   14.23988193]
none train_normal_with_milc [  0.65443742   5.82169447   5.52495873 ... -57.7928581  -57.79502513
 -21.99288379]
none train_abnormal_spectrev3 [-210.61964792  -29.75137891    2.52989056 ...   12.38750628   12.36236202
   12.32705847]
none test_normal_with_perlbench [-28.54718357 -39

 -18.91214465]
none test_abnormal_spectrev4_with_mcf [-126.8974827   -36.91934095   -6.53721296 ...   15.54210261   15.54511495
   15.54795731]
none train_normal_with_soplex [-74.17096845 -19.59261153 -68.55653049 ... -63.10067384 -63.08562171
 -63.08801586]
none test_abnormal_ff_with_milc [-7.54859108  3.05140669  2.10855649 ...  8.64518018  8.63494797
  8.63603761]
none train_abnormal_fr [ 8.88242542 -3.06293597  9.58961356 ... 15.56466978 15.563848
 15.56311335]
none test_abnormal_l1pp_with_gcc [ 2.85246501 -9.31577307  6.77576377 ... 14.10827418 14.10470022
 14.09752702]
none test_abnormal_l3pp_with_gcc [-87.34037747 -15.24751417   5.35489295 ...  15.29160815  15.29181867
  15.29189297]
none test_abnormal_ff_with_bzip2 [-18.24486006 -19.67859891 -18.89261074 ...  15.34155946  15.34136496
  15.34188992]
none test_abnormal_l1pp [ 6.1044631  -7.86784343  8.38609714 ... 14.60027577 14.60172913
 14.60050789]
none test_normal_with_namd [-16.49138517 -15.3051839  -15.39150779 ... -22.4425

   15.0180898 ]
 test_abnormal_spectrev4_with_namd [-2.72513568  7.26519508  0.2766563  ... 15.60270262 15.60264106
 15.60250825]
none test_abnormal_bufferoverflow_with_milc [-375.88519225 -215.22477549 -166.73489595 ...   13.95589081   13.95867599
   13.96071503]
none test_abnormal_bufferoverflow_with_soplex [-475.75101851 -269.90154067 -229.37936557 ...   13.84131997   13.84360363
   13.84635119]
none test_abnormal_spectrev2_with_spec [-204.53998857 -178.92515875 -184.07760876 ... -232.63044327 -366.54707065
 -233.72337915]
none test_abnormal_spectrev3_with_milc [-130.74811975  -30.91586502  -10.23670257 ...   11.7605598    11.63828858
   11.30671175]
none test_abnormal_bufferoverflow_with_libquantum [-36.49002192 -22.0457774  -20.38975023 ...  13.7899586   13.7893129
  13.78861604]
none train_abnormal_spectrev1 [-52.10562664  -9.46918368   4.96180637 ...  13.60224514  13.6207963
  13.628028  ]
none train_abnormal_bufferoverflow [-24.97293773 -22.73912721  -5.05011474 ...  13.9283082

#### Determine threshold

In [33]:
from sklearn.metrics import roc_curve, auc
import pandas as pd
pd.set_option('display.max_rows', None)

model_name = 'merged'

kde_results = collections.defaultdict(collections.defaultdict)

predicts = []
benign_ked_result_all = []
attack_ked_result_all = []

for bg_program in ['none']:
    for file_name in [
        'train_normal',
        'train_normal_with_gpg',
        'train_normal_with_bzip2',
        'train_normal_with_gcc',
        'train_normal_with_mcf',
        'train_normal_with_milc',
        'train_normal_with_namd',
        'train_normal_with_gobmk',
        'train_normal_with_soplex',
        'train_normal_with_hmmer',
        'train_normal_with_libquantum',
        'train_normal_with_h264ref',
    ]:
        kde_result = np.load(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}.npy')[5000:]
        benign_ked_result_all.append(kde_result)
benign_ked_result_all = np.array(benign_ked_result_all).reshape(-1)
        
for bg_program in ['none']:
    for file_name in [ 
        "train_abnormal_l1pp",
        "train_abnormal_l3pp",
        "train_abnormal_fr",
        "train_abnormal_ff",
        "train_abnormal_spectrev1",
        "train_abnormal_spectrev2",
        "train_abnormal_spectrev3",
        "train_abnormal_spectrev4",
    ]:
        kde_result = np.load(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}.npy')[5000:]
        attack_ked_result_all.append(kde_result)
attack_ked_result_all = np.array(attack_ked_result_all).reshape(-1)

def get_eer(y, y_pred):
    fpr, tpr, threshold = roc_curve(y, y_pred, pos_label=1, drop_intermediate=False)
    fnr = 1 - tpr
    
    i = np.nanargmin(np.absolute((fnr - fpr)))
    eer_threshold = threshold[i]
    
    eer_threshold = float('inf')
    
    # For attack detector, there are a wide range of thresholds which have approx EER (diff < 1e-4)
    # Choose the smallest to be strict (highest FPR), because later larger window size can reduce FPR
    for i in range(len(fpr)):
        if abs(fpr[i] - fnr[i]) < 1e-4 and threshold[i] < eer_threshold:
            eer_threshold = threshold[i]
    return eer_threshold

y = [1]*len(attack_ked_result_all) + [0]*len(benign_ked_result_all)
y_pred = np.array(list(attack_ked_result_all) + list(benign_ked_result_all))
eer_threshold = get_eer(y, y_pred)
print(eer_threshold)

-13.985548920440392


In [19]:
print(benign_ked_result_all)
print(attack_ked_result_all)

[-19.88203273 -57.50016014 -19.54671826 ... -65.28087039 -65.49306254
 -65.70615178]
[14.9582821  14.95606868 14.95340668 ... 14.48879371 14.50057449
 14.51543732]


In [44]:
import pandas as pd
pd.set_option('display.max_rows', None)

model_name = 'merged'
th = -18

kde_results = collections.defaultdict(collections.defaultdict)

predicts = []
attack_list = ['l1pp', 'l3pp', 'fr', 'ff', 'spectrev1', 'spectrev2', 'spectrev3', 'spectrev4', 'bufferoverflow']
benign_list = ['gpg', 'bzip2', 'gcc', 'mcf', 'milc', 'namd', 'gobmk', 'soplex', 'hmmer', 'libquantum', 'h264ref']

for bg_program in ['none']:
    """
    for file_name in [
            'test_normal',
            'test_normal_with_gpg',
    ] + [f'test_normal_with_{spec_benchmark}' for spec_benchmark in utils.spec_benchmarks] + [
            'test_abnormal_l1pp',
            'test_abnormal_l3pp',
            'test_abnormal_fr',
            'test_abnormal_ff',
            'test_abnormal_spectrev1',
            'test_abnormal_spectrev2',
            'test_abnormal_spectrev3',
            'test_abnormal_spectrev4',
            'test_abnormal_bufferoverflow',
        ] + [f'test_abnormal_{attack}_with_{benign}' for attack in attack_list for benign in benign_list]:
    """
    for file_name in [
        'test_normal',
        'test_normal_with_gpg',
        'test_normal_with_bzip2',
        'test_normal_with_gcc',
        'test_normal_with_mcf',
        'test_normal_with_milc',
        'test_normal_with_namd',
        'test_normal_with_gobmk',
        'test_normal_with_soplex',
        'test_normal_with_hmmer',
        'test_normal_with_libquantum',
        'test_normal_with_h264ref',
    
    ] + [
            'test_abnormal_l1pp',
            'test_abnormal_l3pp',
            'test_abnormal_fr',
            'test_abnormal_ff',
            'test_abnormal_spectrev1',
            'test_abnormal_spectrev2',
            'test_abnormal_spectrev3',
            'test_abnormal_spectrev4',
            'test_abnormal_bufferoverflow',
        ] + [
            'test_abnormal_l1pp_with_gpg',
            'test_abnormal_l3pp_with_gpg',
            'test_abnormal_fr_with_gpg',
            'test_abnormal_ff_with_gpg',
            'test_abnormal_spectrev1_with_gpg',
            'test_abnormal_spectrev2_with_gpg',
            'test_abnormal_spectrev3_with_gpg',
            'test_abnormal_spectrev4_with_gpg',
            'test_abnormal_bufferoverflow_with_gpg',        
            
            'test_abnormal_l1pp_with_gcc',
            'test_abnormal_l3pp_with_gcc',
            'test_abnormal_fr_with_gcc',
            'test_abnormal_ff_with_gcc',
            'test_abnormal_spectrev1_with_gcc',
            'test_abnormal_spectrev2_with_gcc',
            'test_abnormal_spectrev3_with_gcc',
            'test_abnormal_spectrev4_with_gcc',
            'test_abnormal_bufferoverflow_with_gcc',
        
            'test_abnormal_l1pp_with_libquantum',
            'test_abnormal_l3pp_with_libquantum',
            'test_abnormal_fr_with_libquantum',
            'test_abnormal_ff_with_libquantum',
            'test_abnormal_spectrev1_with_libquantum',
            'test_abnormal_spectrev2_with_libquantum',
            'test_abnormal_spectrev3_with_libquantum',
            'test_abnormal_spectrev4_with_libquantum',
            'test_abnormal_bufferoverflow_with_libquantum',
        ]:
        kde_result = np.load(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}.npy')
        total = float(len(kde_result))
        
        # Model the distribution of attack (high probability -> attack)
        pred_normal = np.sum(kde_result <= th) / total
        pred_abnormal = np.sum(kde_result > th) / total
        
        predicts.append([
            bg_program, file_name, pred_normal, pred_abnormal,
            np.mean(kde_result), np.min(kde_result), np.max(kde_result),
            np.percentile(kde_result, 10), np.percentile(kde_result, 90)])

columns = ['Workload', 'Test Case', 'Pred normal', 'Pred abnormal', 'Mean', 'Min', 'Max', '10%', '90%']
print(pd.DataFrame(predicts, columns=columns))

   Workload                                     Test Case  Pred normal  \
0      none                                   test_normal     1.000000   
1      none                          test_normal_with_gpg     1.000000   
2      none                        test_normal_with_bzip2     0.978089   
3      none                          test_normal_with_gcc     1.000000   
4      none                          test_normal_with_mcf     0.981089   
5      none                         test_normal_with_milc     0.937540   
6      none                         test_normal_with_namd     0.999454   
7      none                        test_normal_with_gobmk     0.973088   
8      none                       test_normal_with_soplex     0.999182   
9      none                        test_normal_with_hmmer     1.000000   
10     none                   test_normal_with_libquantum     1.000000   
11     none                      test_normal_with_h264ref     0.999909   
12     none                           

#### Attack detection with Cloud workload

In [49]:
from sklearn.neighbors import KernelDensity
from pathlib import Path
import concurrent

kde_results = collections.defaultdict(collections.defaultdict)
file_names = [
    'train_normal',
    'train_abnormal_l1pp',
    'train_abnormal_l3pp',
    'train_abnormal_fr',
    'train_abnormal_ff',
    'train_abnormal_spectrev1',
    'train_abnormal_spectrev2',
    'train_abnormal_spectrev3',
    'train_abnormal_spectrev4',
    'train_abnormal_bufferoverflow',
]


training_data = []
sampling = True

for bg_program in ['mltrain', 'mysql', 'webserver', 'streamserver', 'mapreduce']:
    for file_name in file_names:
        d = pred_errors[bg_program][file_name]
        if sampling:
            sampling_idx = np.random.randint(low=0, high=len(d), size=1000)
            d = d[sampling_idx, :]
        
        training_data.append(d)

training_data = np.concatenate(training_data, axis=0)


In [50]:
from sklearn.neighbors import KernelDensity
from pathlib import Path
import concurrent

kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(training_data)

kde_results = collections.defaultdict(collections.defaultdict)
for bg_program in ['mltrain', 'mysql', 'webserver', 'streamserver', 'mapreduce']:
        
    def known_attack_detection(kde, data, bg_program, file_name):  
        kde_result = kde.score_samples(data)
        total = np.float32(len(kde_result))
        
        Path(f'detector/preprocessed/test_step1_attack_kde/{model_name}/{bg_program}').mkdir(parents=True, exist_ok=True)
        np.save(f'detector/preprocessed/test_step1_attack_kde/{model_name}/{bg_program}/{file_name}', kde_result)
        print(bg_program, file_name, kde_result)
        return 1
        
    executor = concurrent.futures.ProcessPoolExecutor(20)
    futures = [executor.submit(known_attack_detection, kde, pred_errors[bg_program][file_name], bg_program, file_name) for file_name in pred_errors[bg_program].keys()]
    concurrent.futures.wait(futures)

mltrain test_abnormal_spectrev3_with_gpg [-175.57274032  -79.34052649 -100.68368948 ... -144.76276126 -142.43239932
 -142.91996529]
mltrain test_normal_with_astar [-356.21863786 -254.68867168 -223.43818103 ...    8.23292759    8.23791012
    8.2388951 ]
mltrain test_abnormal_spectrev1_with_gcc [-61.54817578 -17.20611078  -1.83564803 ...  13.94667489  13.98993315
  13.9905278 ]
mltrain test_abnormal_spectrev1_with_gpg [-82.31975805 -23.63783672  -1.54686784 ...  14.06454383  14.0609075
  14.0125878 ]
mltrain train_abnormal_spectrev3 [-211.18831848  -34.85417395    1.19135895 ...   13.69240206   13.77437149
   13.72250428]
mltrain test_abnormal_l3pp_with_gobmk [-16.49452943 -17.88214917 -32.33940955 ... -31.40616875 -19.04434962
 -18.2248118 ]
mltrain test_abnormal_fr_with_gobmk [-31.40739834 -26.85257996 -17.67048486 ... -17.9635182  -41.30456375
 -41.09008383]
mltrain test_abnormal_spectrev4_with_gcc [-30.13873489   1.0371961    6.70789863 ...  13.34349655  13.34757518
  13.34380668]
m

mltrain test_abnormal_spectrev1_with_h264ref [-574.97452107 -408.76947104 -132.79878094 ...   13.6200795    13.68374852
   13.72361422]
mltrain test_abnormal_spectrev4_with_namd [-1.09645688  2.67744826 -5.08158763 ... 11.13875244 11.13868704
 11.13867427]
mltrain test_abnormal_spectrev3_with_milc [-174.45558352 -181.74027305 -133.5226218  ... -140.78953053 -141.29789093
 -141.27825   ]
mltrain test_abnormal_l3pp [-75.02210667 -14.29892416  -0.14942244 ...  -7.21407883  -7.18719498
  -7.16713747]
mltrain test_abnormal_l1pp_with_hmmer [ -49.61417707  -43.7985401  -110.9863398  ...   13.68616988   13.68501238
   13.68591447]
mltrain test_abnormal_bufferoverflow_with_milc [-81.4862564  -99.20157809 -22.37189154 ...  12.50399502  12.50687105
  12.51001997]
mltrain test_abnormal_bufferoverflow_with_soplex [-389.7262946  -296.59500695 -249.48276505 ...   13.80963262   13.80969766
   13.80865476]
mltrain test_normal_with_hmmer [-1242.7833789   -990.04476257  -905.02161527 ...   -18.593971
   

  -5.78569518]
mysql test_abnormal_ff_with_spec [-16.67553661  -9.20835462  -5.12600103 ...   1.97390494   1.96054734
   1.95753904]
mysql test_abnormal_l3pp_with_gpg [-13.05993782  -7.55999913  -5.96822394 ...  -7.70406228  -7.72749561
  -7.68826636]
mysql train_normal_with_gcc [-22.70369109  -6.16157968  -0.44035709 ...  15.12160949  15.1143054
  15.11982681]
mysql test_normal_with_libquantum [-48.37177387 -17.4865429  -11.97149793 ...  15.35307281  15.3549263
  15.35666676]
mysql test_abnormal_l3pp_with_hmmer [-22.26282863  -8.54440695  -5.81986426 ...  -2.30872304  -2.254813
  -2.1836133 ]
mysql test_abnormal_fr_with_gcc [-17.39309843 -13.82075117  -4.58345307 ...   4.92635266   4.97151728
   4.99383736]
mysql test_abnormal_l1pp_with_gobmk 
[-17.33287012  -8.99217976  -2.90580884 ...  -1.36588787  -1.37337132
  -1.38527502]mysql test_abnormal_bufferoverflow_with_h264ref [-17.84397674 -19.80488799 -19.83513138 ... -22.20849523 -56.67809754
 -56.70887701]
mysql test_abnormal_spectrev

webserver test_abnormal_spectrev3 [-91.58823206  -4.84768821   0.44209489 ...  11.16072984  11.20250025
  11.25611809]
webserver test_normal_with_libquantum [-16.23657433  -9.39787607  -4.91720367 ...   7.85639356   7.79196607
   7.58213656]
webserver test_abnormal_bufferoverflow_with_gpg [-19.27110083 -27.66850547  -5.30967642 ...  12.24379986  12.19650391
  12.18086532]
webserver test_abnormal_l1pp_with_gpg [-20.68832955 -17.92847469  -4.65972541 ...  13.34258782  13.33823097
  13.33182579]
webserver test_abnormal_spectrev1 webserver[-17.3431146  -21.56361267  -7.76136459 ...  11.6546093   11.67531174
  11.67084212]
 test_abnormal_ff_with_gpg [ -6.99762237 -14.37499968   1.9283469  ...  12.84008205  12.87501708
  12.87787517]
webserver test_abnormal_spectrev2_with_gcc [-31.07949428 -19.61901389  -7.96355267 ...  13.53221612  13.53703975
  13.53232304]
webserver train_abnormal_l1pp [-50.04393492 -18.9576188   -4.11427969 ...  13.68880327  13.6892545
  13.67974927]
webserver test_abnor

  12.92793797]
streamserver test_abnormal_spectrev1_with_gcc [-18.2803238   -8.19198987  -3.76422821 ...  10.78459578  10.76891198
  10.77328753]streamserver
 test_abnormal_spectrev1_with_gpg [-31.34918817 -12.54391848  -7.56688674 ...  11.97683199  11.95038364
  11.94468988]
streamserver test_abnormal_l3pp_with_gpg [-49.48464752 -32.22351105 -16.24062659 ...   9.05141058   9.12102914
   8.98165297]
streamserver train_normal_with_h264ref [-169.23034601 -156.75495512 -133.86471366 ... -140.28374207 -140.19133031
  -24.81869592]
streamserver test_abnormal_spectrev3 [-72.15267866 -32.87233953 -31.07026004 ... -22.57128903 -21.30647028
 -44.38874931]
streamserver test_normal_with_libquantum [-224.83390603  -28.5548072   -25.37048401 ...   15.10781976   15.10739705
   15.11265316]
streamserver train_normal_with_h264 [-169.23034601 -156.75495512 -133.86471366 ... -140.28374207 -140.19133031
  -24.81869592]
streamserver test_abnormal_ff_with_spec [-4.45870751 -5.89122044 -4.63971538 ...  6.73

   12.77318455]
mapreduce test_abnormal_spectrev4_with_gpg [-150.22168017  -16.71382919    5.43746873 ...   13.53144258   13.53150008
   13.53202716]
mapreduce test_abnormal_spectrev4_with_libquantum [-13.35863208   3.52981658   3.54417222 ...  10.94537987  10.94983011
  10.95390282]
mapreduce test_abnormal_fr_with_gcc [-25.0218815    0.05976554   9.27684374 ...  14.02142088  14.02152248
  14.02223174]
mapreduce train_abnormal_ff [-104.84612954  -22.41172097    1.22388296 ...   14.55559269   14.55540603
   14.55559599]
mapreduce train_normal_with_gcc [ 3.75353394 11.48845154 12.03460347 ... 15.1760168  15.17428909
 15.17425013]
mapreduce test_abnormal_spectrev4_with_gcc [-134.13835964  -15.74438398    5.36772964 ...   13.31944799   13.32500539
   13.33144707]
mapreduce test_abnormal_bufferoverflow [  6.32387593 -25.45913538 -10.91047568 ...  13.80063667  13.80178833
  13.80429096]
mapreduce test_abnormal_spectrev4 [-157.93524377  -23.11230859    3.73120915 ...   14.57195975   14.572910

In [56]:
import pandas as pd

from sklearn.neighbors import KernelDensity
from pathlib import Path
import concurrent

pd.set_option('display.max_rows', None)

model_name = 'merged'
th = -14

kde_results = collections.defaultdict(collections.defaultdict)

predicts = []
attack_list = ['l1pp', 'l3pp', 'fr', 'ff', 'spectrev1', 'spectrev2', 'spectrev3', 'spectrev4', 'bufferoverflow']

for bg_program in ['mltrain', 'mysql', 'webserver', 'streamserver', 'mapreduce']:
    for file_name in [
        'test_normal',
        'test_normal_with_gpg',
        'test_normal_with_gcc',
        'test_normal_with_mcf',
        'test_normal_with_libquantum',
        'test_abnormal_l1pp',
        'test_abnormal_l3pp',
        'test_abnormal_fr',
        'test_abnormal_ff',
        'test_abnormal_spectrev1',
        'test_abnormal_spectrev2',
        'test_abnormal_spectrev3',
        'test_abnormal_spectrev4',
        'test_abnormal_bufferoverflow',
    ]:
        kde_result = np.load(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}.npy')
        total = float(len(kde_result))
        
        # Model the distribution of attack (high probability -> attack)
        pred_normal = np.sum(kde_result <= th) / total
        pred_abnormal = np.sum(kde_result > th) / total
        
        predicts.append([
            bg_program, file_name, pred_normal, pred_abnormal,
            np.mean(kde_result), np.min(kde_result), np.max(kde_result),
            np.percentile(kde_result, 10), np.percentile(kde_result, 90)])

columns = ['Workload', 'Test Case', 'Pred normal', 'Pred abnormal', 'Mean', 'Min', 'Max', '10%', '90%']
print(pd.DataFrame(predicts, columns=columns))
pd.DataFrame(predicts, columns=columns).to_excel(f'test_step1_attack_detection.xlsx')

        Workload                     Test Case  Pred normal  Pred abnormal  \
0        mltrain                   test_normal     1.000000       0.000000   
1        mltrain          test_normal_with_gpg     1.000000       0.000000   
2        mltrain          test_normal_with_gcc     1.000000       0.000000   
3        mltrain          test_normal_with_mcf     0.999364       0.000636   
4        mltrain   test_normal_with_libquantum     1.000000       0.000000   
5        mltrain            test_abnormal_l1pp     0.000000       1.000000   
6        mltrain            test_abnormal_l3pp     0.000000       1.000000   
7        mltrain              test_abnormal_fr     0.000000       1.000000   
8        mltrain              test_abnormal_ff     0.000000       1.000000   
9        mltrain       test_abnormal_spectrev1     0.000000       1.000000   
10       mltrain       test_abnormal_spectrev2     0.000091       0.999909   
11       mltrain       test_abnormal_spectrev3     0.000000     

In [None]:
training_data = []
sampling = True

for file_name in [
    "train_abnormal_l1pp",
    "train_abnormal_l3pp",
    #"train_abnormal_fr",
    #"train_abnormal_ff",
    "train_abnormal_spectrev1",
    "train_abnormal_spectrev2",
    #"train_abnormal_spectrev3",
    #"train_abnormal_spectrev4",
    "train_abnormal_bufferoverflow",
]:
    d = pred_errors['none'][file_name][:5000]
    if sampling:
        sampling_idx = np.random.randint(low=0, high=len(d), size=1000)
        d = d[sampling_idx, :]
        
    training_data.append(d)
    
training_data = np.concatenate(training_data, axis=0)


kde = KernelDensity(kernel='gaussian', bandwidth=0.1).fit(training_data)

kde_results = collections.defaultdict(collections.defaultdict)
for bg_program in ['none']:
        
    def known_attack_detection(kde, data, bg_program, file_name):  
        kde_result = kde.score_samples(data)
        total = np.float32(len(kde_result))
        
        Path(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}').mkdir(parents=True, exist_ok=True)
        np.save(f'detector/preprocessed/attack_kde/{model_name}/{bg_program}/{file_name}', kde_result)
        print(bg_program, file_name, kde_result)
        return 1
        
    executor = concurrent.futures.ProcessPoolExecutor(20)
    futures = [executor.submit(known_attack_detection, kde, pred_errors[bg_program][file_name], bg_program, file_name) for file_name in pred_errors[bg_program].keys()]
    concurrent.futures.wait(futures)