In [1]:
import numpy as np
import pandas as pd
from glob import glob
from sklearn.metrics import f1_score

In [30]:
strategies = [
    "gcn",
    "igcn",
    "gcngru",
    "igcngru",
    "gcn_features",
    "igcn_features",
    "gcngru_features",
    "igcngru_features",
    "idarkvec"
]

k_n = 'k3'

In [31]:
df = pd.read_csv("stacking_predictions/out/k3/test/idarkvec_20211224_fold02.csv")

In [32]:
df.columns

Index(['mirai', 'unk_bruteforcer', 'unk_spammer', 'shadowserver', 'driftnet',
       'internetcensus', 'censys', 'rapid7', 'onyphe', 'netsystems', 'shodan',
       'unk_exploiter', 'securitytrails', 'intrinsec', 'unknown', 'y_true'],
      dtype='object')

In [33]:
def f1(df: pd.DataFrame):

    probs_cols = ['mirai',
                  'unk_bruteforcer',
                  'unk_spammer',
                  'shadowserver',
                  'driftnet',
                  'internetcensus',
                  'censys',
                  'rapid7',
                  'onyphe',
                  'netsystems',
                  'shodan',
                  'unk_exploiter',
                  'securitytrails',
                  'intrinsec',
                  'unknown']


    label_to_idx = { l:idx for idx, l in enumerate(probs_cols)  }

    # Getting only probabilities.
    probs = df.drop(columns=["y_true"])[probs_cols].copy(deep=True)
    # Getting labels.
    labels = df.y_true.copy(deep=True)
    # Building a dictionay of labels (strings) to int (class number).
    # Converting labels (string) to int (class number).
    y = [ label_to_idx[l] for l in labels ]
    # Taking predctions out of probabilities.
    preds = probs.values.argmax(axis=1)
    # Computing metrics.
    macro = f1_score(y, preds, labels=np.arange(len(probs_cols)), average="macro", zero_division=0)
    # Macro by class.
    macro_by_class = f1_score(y, preds, labels=np.arange(len(probs_cols)), average=None, zero_division=0)
    return macro, macro_by_class

In [34]:
days = sorted([ f.split('/')[-1].split('_')[-2] for f in glob(f"stacking_predictions/out/{k_n}/test/idarkvec*_fold00.csv") ])
days

['20211221',
 '20211222',
 '20211223',
 '20211224',
 '20211225',
 '20211226',
 '20211227',
 '20211228',
 '20211229',
 '20211230',
 '20211231']

In [35]:
macros = {}
for strat in strategies:
    print(strat)
    macros[strat] = {}
    for day in days:
        macros[strat][day] = []
        for fold in np.arange(10):
            file_path = f"stacking_predictions/out/{k_n}/test/{strat}_{day}_fold0{fold}.csv"
            df = pd.read_csv(file_path)
            macros[strat][day].append(df)
        

gcn
igcn
gcngru
igcngru
gcn_features
igcn_features
gcngru_features
igcngru_features
idarkvec


In [36]:
scores = {}
for strat in strategies:
    print(strat)
    macro_list, class_list = [], []
    for day in days:
        df = pd.concat(macros[strat][day])
        m_mean, m_class = f1(df)
        macro_list.append(m_mean)
        class_list.append(m_class)
    
    
    scores[strat] = np.mean(class_list, axis=0).tolist()
    scores[strat].append(np.mean(macro_list))


gcn
igcn
gcngru
igcngru
gcn_features
igcn_features
gcngru_features
igcngru_features
idarkvec


In [37]:
cols = ['mirai',
        'unk_bruteforcer',
        'unk_spammer',
        'shadowserver',
        'driftnet',
        'internetcensus',
        'censys',
        'rapid7',
        'onyphe',
        'netsystems',
        'shodan',
        'unk_exploiter',
        'securitytrails',
        'intrinsec',
        'unknown',
        'avg']

In [38]:
pd.DataFrame(scores)#, columns=cols).T

Unnamed: 0,gcn,igcn,gcngru,igcngru,gcn_features,igcn_features,gcngru_features,igcngru_features,idarkvec
0,0.63395,0.745382,0.67319,0.713147,0.977589,0.977315,0.927159,0.977965,0.98023
1,0.117333,0.515602,0.517194,0.509065,0.607907,0.592284,0.608406,0.623048,0.608631
2,0.152167,0.241809,0.318808,0.217333,0.462663,0.44739,0.419331,0.45853,0.424358
3,0.132049,0.486079,0.632747,0.475482,0.89652,0.889494,0.94817,0.954509,0.958348
4,0.781323,0.707078,0.868711,0.895378,0.88049,0.848538,0.97212,0.970686,0.98109
5,0.261423,0.271465,0.658732,0.405444,0.607075,0.558844,0.898164,0.918863,0.992542
6,0.634582,0.603178,0.638091,0.6752,0.888578,0.885952,0.918572,0.921728,0.948215
7,0.331698,0.270203,0.327487,0.296157,0.303439,0.300555,0.249113,0.351992,0.363502
8,0.03304,0.025642,0.052699,0.024573,0.667953,0.669021,0.66657,0.659605,0.705536
9,0.00404,0.032737,0.221818,0.025411,0.869897,0.873334,0.725779,0.862668,0.427465
