In [76]:
import numpy as np
import pandas as pd
from glob import glob
from sklearn.metrics import classification_report

In [91]:
k_n = 'k7'

In [92]:
strategies = [
    "gcn",
    "igcn",
    "gcngru",
    "igcngru",
    "gcn_features",
    "igcn_features",
    "gcngru_features",
    "igcngru_features",
    "idarkvec"
]

In [93]:
def f1(df: pd.DataFrame):

    probs_cols = ['mirai',
                  'unk_bruteforcer',
                  'unk_spammer',
                  'shadowserver',
                  'driftnet',
                  'internetcensus',
                  'censys',
                  'rapid7',
                  'onyphe',
                  'netsystems',
                  'shodan',
                  'unk_exploiter',
                  'securitytrails',
                  'intrinsec',
                  'unknown']
    
    # Getting only probabilities.
    probs = df.drop(columns=["y_true"])[probs_cols].copy(deep=True)
    # Getting labels.
    labels = df.y_true.copy(deep=True)
    # Taking predctions out of probabilities.
    preds = probs.idxmax(axis=1)
    # Macro by class.
    return classification_report(labels, preds, labels=np.unique(labels), output_dict=True, zero_division=0.0)

In [94]:
df = pd.read_csv("out_loo/k3/train/gcn_20211221_loo.csv")
df.head()

Unnamed: 0,src_ip,mirai,unk_bruteforcer,unk_spammer,shadowserver,driftnet,internetcensus,censys,rapid7,onyphe,netsystems,shodan,unk_exploiter,securitytrails,intrinsec,unknown,y_true
0,45.145.66.212,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,unknown
1,167.94.138.16,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,censys
2,193.201.9.74,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,unknown
3,31.168.67.5,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,mirai
4,46.101.221.223,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,unknown


In [95]:
report = f1(df)

In [96]:
report['censys']

{'precision': 0.5480769230769231,
 'recall': 0.3737704918032787,
 'f1-score': 0.4444444444444444,
 'support': 305.0}

In [97]:
files = glob(f"out_loo/{k_n}/train/*")

In [98]:
days = list(set([ f.split('_')[-2] for f in files ]))
days.sort()
days

['20211221',
 '20211222',
 '20211223',
 '20211224',
 '20211225',
 '20211226',
 '20211227',
 '20211228',
 '20211229',
 '20211230',
 '20211231']

In [99]:
scores = {}
for strat in strategies:
    scores[strat] = {}
    for day in days:
        file_path = f"out_loo/{k_n}/train/{strat}_{day}_loo.csv"
        dataframe = pd.read_csv(file_path)
        df = dataframe[dataframe.y_true != "unknown"].copy()
        scores[strat][day] = f1(df)

In [100]:
labels = ['mirai',
'unk_bruteforcer',
'unk_spammer',
'shadowserver',
'driftnet',
'internetcensus',
'censys',
'rapid7',
'onyphe',
'netsystems',
'shodan',
'unk_exploiter',
'securitytrails',
'intrinsec']

In [101]:
f1_points = {}
for strat in strategies:
    f1_points[strat] = {}
    for label in labels:
        f1_points[strat][label] = []
        for day in scores[strat]:
            if label in scores[strat][day]:
                f1_points[strat][label].append(scores[strat][day][label]['f1-score'])

In [102]:
means = {}
for strat in f1_points:
    means[strat] = {}
    for label in labels:
        means[strat][label] = np.mean(f1_points[strat][label])

In [103]:
pd.DataFrame(means)

Unnamed: 0,gcn,igcn,gcngru,igcngru,gcn_features,igcn_features,gcngru_features,igcngru_features,idarkvec
mirai,0.629611,0.774843,0.692567,0.73592,0.97841,0.97843,0.862161,0.979522,0.981704
unk_bruteforcer,0.083696,0.535798,0.536806,0.542273,0.594541,0.590996,0.601885,0.632,0.601197
unk_spammer,0.12619,0.198517,0.302721,0.187049,0.433384,0.423821,0.38099,0.441411,0.410073
shadowserver,0.128438,0.506951,0.653184,0.491316,0.898268,0.891561,0.940643,0.947954,0.961232
driftnet,0.794005,0.723707,0.864973,0.893524,0.889335,0.862444,0.967207,0.966852,0.968282
internetcensus,0.262459,0.231789,0.652343,0.424341,0.593495,0.532736,0.892595,0.915311,0.991897
censys,0.644984,0.609401,0.66149,0.693191,0.886571,0.885565,0.913626,0.91809,0.945643
rapid7,0.89453,0.748915,0.89812,0.845447,0.849735,0.836464,0.680759,0.965519,1.0
onyphe,0.04545,0.025856,0.058535,0.029809,0.933935,0.935195,0.938202,0.933947,0.970721
netsystems,0.004444,0.05149,0.161312,0.032609,0.952522,0.932236,0.790379,0.931272,0.437522
