In [29]:
from ivbase.utils.datasets.datacache import DataCache
from gnnpooling.utils.metrics import roc_auc_score, f1_score, accuracy
import os
import tarfile
import pickle
import pandas as pd
import re
import numpy as np
import json, glob
import joblib
from collections import defaultdict

def compute_roc(dt):
    y = dt["test"]
    mask = y.sum(axis=-1) > 0
    y = y[mask, :]
    y_pred = dt["pred"]
    y_pred = y_pred[mask, :]
    return roc_auc_score(y_pred, y, average="micro")

def compute_f1(dt):
    y = dt["test"]
    mask = y.sum(axis=-1) > 0
    y = y[mask, :]
    y_pred = dt["pred"]
    y_pred = y_pred[mask, :]
    return f1_score(y_pred, y, average="micro")


def compute_acc(dt):
    y = dt["test"]
    y_pred = dt["pred"]
    return accuracy(y_pred, y)

### Retrieve paths to aws outputs and store locally

In [107]:
results_cache1 = DataCache(cache_root='fragments')
results_diff1 = results_cache1.get_dir("s3://experiments-output/iclr_fragments_diff/")
results_topk1 = results_cache1.get_dir("s3://experiments-output/iclr_fragments_topk/")
results_lap1 = results_cache1.get_dir("s3://experiments-output/iclr_fragments_lap/")
results_cache2 = DataCache(cache_root='alerts')
results_diff2 = results_cache2.get_dir("s3://experiments-output/iclr_alerts_diff")
results_topk2 = results_cache2.get_dir("s3://experiments-output/iclr_alerts_topk")
results_lap2 = results_cache2.get_dir("s3://experiments-output/iclr_alerts_lap")

download: s3://experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf.zip to fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf.zip
download: s3://experiments-output/iclr_fragments_lap/104958149e6800171b5780163597607388a0b297.zip to fragments/experiments-output/iclr_fragments_lap/104958149e6800171b5780163597607388a0b297.zip
download: s3://experiments-output/iclr_fragments_lap/11405ddc4810ddd789cbe2eebf1b3c8f4d2d8586.zip to fragments/experiments-output/iclr_fragments_lap/11405ddc4810ddd789cbe2eebf1b3c8f4d2d8586.zip
download: s3://experiments-output/iclr_fragments_lap/104c1f00dd3775552798915265eeeda26076d402.zip to fragments/experiments-output/iclr_fragments_lap/104c1f00dd3775552798915265eeeda26076d402.zip
download: s3://experiments-output/iclr_fragments_lap/11b757c1f83175a2e7f580166d570e6dd160d2e8.zip to fragments/experiments-output/iclr_fragments_lap/11b757c1f83175a2e7f580166d570e6dd160d2e8.zip
download: s3://experiments-out

download: s3://experiments-output/iclr_fragments_lap/f92465adddcebd3f9958c0bc41d3bbed2c3d194d.zip to fragments/experiments-output/iclr_fragments_lap/f92465adddcebd3f9958c0bc41d3bbed2c3d194d.zip
download: s3://experiments-output/iclr_fragments_lap/e054955d8ece796dc745807481cd131e79586728.zip to fragments/experiments-output/iclr_fragments_lap/e054955d8ece796dc745807481cd131e79586728.zip
download: s3://experiments-output/iclr_fragments_lap/fcd114290d7f64283da51d4f34560de44901404c.zip to fragments/experiments-output/iclr_fragments_lap/fcd114290d7f64283da51d4f34560de44901404c.zip
download: s3://experiments-output/iclr_fragments_lap/fdc1589c4ad908da9ca2069861435ebde7865510.zip to fragments/experiments-output/iclr_fragments_lap/fdc1589c4ad908da9ca2069861435ebde7865510.zip


### Exctract files from unprocessed directories

In [108]:
%%bash
for f in $(ls fragments/experiments-output/*/*zip)
do 
    folder=${f%.zip}
    if [ ! -d "$folder" ]
    then
        unzip -d ${f%.zip} $f && echo $f
    fi
done

Archive:  fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf.zip
   creating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/laplacian_10/
  inflating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/stderr.txt  
  inflating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/config.json  
  inflating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/stdout.txt  
   creating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/laplacian_10/0/
   creating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/laplacian_10/1/
   creating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57110b63c95a55809ee4bf/laplacian_10/2/
   creating: fragments/experiments-output/iclr_fragments_lap/00f24d3c4d6f2d9ccf57

In [60]:
# alerts
dataset = "alerts"
roc_alerts = defaultdict(dict)
f1_alerts = defaultdict(dict)
acc_alerts = defaultdict(dict)

for conf in glob.glob(f"{dataset}/experiments-output/*/*/config.json"):
    config = json.load(open(conf))
    name = config["arch"]
    metrics = glob.glob(os.path.join(os.path.dirname(conf), f"{name}*"))
    for mfile in metrics:
        for k in range(3):
            m = glob.glob(os.path.join(mfile, f"{k}/test*"))[0]
            m = np.load(m)
            roc_alerts[name][k] = max(compute_roc(m), roc_alerts[name].get(k, -1))
            f1_alerts[name][k] = max(compute_f1(m), f1_alerts[name].get(k, -1))
            acc_alerts[name][k] = max(compute_acc(m), acc_alerts[name].get(k, -1))

# gnns
name = 'gnn'
for k in range(3):
    m = np.load(glob.glob(f"gnn/*/{k}/test*.npz")[0])
    roc_alerts[name][k] = max(compute_roc(m), roc_alerts[name].get(k, -1))
    f1_alerts[name][k] = max(compute_f1(m), f1_alerts[name].get(k, -1))
    acc_alerts[name][k] = max(compute_acc(m), acc_alerts[name].get(k, -1))


In [109]:
# fragments
dataset = "fragments"
roc = defaultdict(dict)
f1 = defaultdict(dict)
acc = defaultdict(dict)

for conf in glob.glob(f"{dataset}/experiments-output/*/*/config.json"):
    config = json.load(open(conf))
    name = config["arch"]
    metrics = glob.glob(os.path.join(os.path.dirname(conf), f"{name}*"))
    for mfile in metrics:
        for k in range(3):
            m = glob.glob(os.path.join(mfile, f"{k}/test*"))[0]
            m = np.load(m)
            roc[name][k] = max(compute_roc(m), roc[name].get(k, -1))
            f1[name][k] = max(compute_f1(m), f1[name].get(k, -1))
            acc[name][k] = max(compute_acc(m), acc[name].get(k, -1))

name = 'gnn'
for k in range(3):
    m = np.load(glob.glob(f"gnn_fragments/*/{k}/test*.npz")[0])
    roc[name][k] = max(compute_roc(m), roc[name].get(k, -1))
    f1[name][k] = max(compute_f1(m), f1[name].get(k, -1))
    acc[name][k] = max(compute_acc(m), acc[name].get(k, -1))


### Format results into DataFrame

In [112]:
pd.set_option('display.float_format','{:.3f}'.format)
(pd.DataFrame.from_dict(f1)*100).describe()

Unnamed: 0,diff,topk,laplacian,gnn
count,3.0,3.0,3.0,3.0
mean,97.961,95.469,98.98,99.436
std,0.384,1.414,0.506,0.545
min,97.625,94.328,98.47,98.807
25%,97.752,94.678,98.729,99.263
50%,97.879,95.028,98.988,99.719
75%,98.129,96.04,99.235,99.75
max,98.379,97.052,99.481,99.781


In [102]:
np.std([f1_alerts['diff'][1], f1_alerts['diff'][0]])*100

9.915993687923514

In [103]:
f1_alerts

defaultdict(dict,
            {'diff': {0: 0.387218045112782,
              1: 0.5855379188712523,
              2: 0.03822124219037119},
             'laplacian': {0: 0.7522697795071337,
              1: 0.7367205542725173,
              2: 0.8687664041994752},
             'topk': {0: 0.2115835286240375,
              1: 0.3460639127045986,
              2: 0.4056291390728477},
             'gnn': {0: 0.28030690537084396,
              1: 0.3548664944013781,
              2: 0.14738292011019283}})

In [None]:
def print_res(l):
    l = [ll*100 for ll in l]
    print("{:.2f}".format(np.mean(l)))
    print("{:.2f}".format(np.std(l)))
    print("{:.2f}".format(np.max(l)))
print_res(gnns)