# Computação da F1 com Wilcoxon-test

In [None]:
import pickle
import numpy as np
import pandas as pd
import scipy.stats as stats
from glob import glob
from sklearn.metrics import f1_score

In [None]:
def load_pickle(pickle_path: str):

    with open(pickle_path, 'rb') as fd:
        return pickle.load(fd)

In [None]:
strategies = [
    "gcn",
    "igcn",
    "gcngru",
    "igcngru",
    "gcn_features",
    "igcn_features",
    "gcngru_features",
    "igcngru_features",
    "idarkvec"
]

k_n = 'k3'

In [None]:
df = pd.read_csv("stacking_predictions/out/k3/test/idarkvec_20211224_fold02.csv")

In [None]:
df.columns

In [None]:
def f1(df: pd.DataFrame):

    probs_cols = ['mirai',
                  'unk_bruteforcer',
                  'unk_spammer',
                  'shadowserver',
                  'driftnet',
                  'internetcensus',
                  'censys',
                  'rapid7',
                  'onyphe',
                  'netsystems',
                  'shodan',
                  'unk_exploiter',
                  'securitytrails',
                  'intrinsec',
                  'unknown']


    label_to_idx = { l:idx for idx, l in enumerate(probs_cols)  }

    # Getting only probabilities.
    probs = df.drop(columns=["y_true"])[probs_cols].copy(deep=True)
    # Getting labels.
    labels = df.y_true.copy(deep=True)
    # Building a dictionay of labels (strings) to int (class number).
    # Converting labels (string) to int (class number).
    y = [ label_to_idx[l] for l in labels ]
    # Taking predctions out of probabilities.
    preds = probs.values.argmax(axis=1)
    # Computing metrics.
    macro = f1_score(y, preds, labels=np.arange(len(probs_cols)), average="macro", zero_division=0)
    # Macro by class.
    macro_by_class = f1_score(y, preds, labels=np.arange(len(probs_cols)), average=None, zero_division=0)
    return macro, macro_by_class

In [None]:
days = sorted([ f.split('/')[-1].split('_')[-2] for f in glob(f"stacking_predictions/out/{k_n}/test/idarkvec*_fold00.csv") ])
days

In [None]:
pickle_path = f"reports/{k_n}.pkl"
stacking = load_pickle(pickle_path)

In [None]:
stacking['20211221'][0].keys()

### Computing Wilcoxon-Test by Day.

In [None]:
cols = ['mirai',
    'unk_bruteforcer',
    'unk_spammer',
    'shadowserver',
    'driftnet',
    'internetcensus',
    'censys',
    'rapid7',
    'onyphe',
    'netsystems',
    'shodan',
    'unk_exploiter',
    'securitytrails',
    'intrinsec',
    'unknown']

label_to_idx = { col:idx for idx, col in enumerate(cols) }

In [117]:
target_class = "shadowserver"
target_strategies = ["idarkvec", "igcngru_features"]

In [118]:
macros = {}
paired_test_days = {}
# For each strategy.
for strat in target_strategies:
    print('*' * 5, strat, '*' * 5)
    macros[strat] = {}
    paired_test_days[strat] = {}
    # For each day.
    for day in days:
        macros[strat][day] = {}
        strat_points, stacking_points = [], []
        # For each fold.
        for fold in np.arange(10):
            file_path = f"stacking_predictions/out/{k_n}/test/{strat}_{day}_fold0{fold}.csv"
            df = pd.read_csv(file_path)
            _, macro_by_class = f1(df)
            macros[strat][day][fold] = macro_by_class
            strat_points.append(macro_by_class[label_to_idx[target_class]])
            stacking_points.append(f1_score(stacking[day][fold]['y'],
                                    stacking[day][fold]["preds"],
                                    average=None,
                                    labels=np.arange(len(cols)),
                                    zero_division=0)[label_to_idx[target_class]])
            
        try:
            res = stats.wilcoxon(stacking_points, strat_points, alternative='greater')
        except:
            res = [-1, -1]
        paired_test_days[strat][day] = f"{res[0]};{res[1]}"
        
        

        

***** idarkvec *****




***** igcngru_features *****




In [119]:
for strat in target_strategies:
    print(strat)
    for day in days:
        print(paired_test_days[strat][day])

idarkvec
17.5;0.26354462843276905
10.0;0.032799846073535935
21.0;0.013428347753762199
28.0;0.00877616296920843
34.0;0.012176559331126819
36.0;0.00575691403842271
45.0;0.003736770770438521
33.0;0.017845950058402206
28.0;0.008980238763039383
45.0;0.0037896409716948643
45.0;0.0037896409716948643
igcngru_features
55.0;0.0009765625
55.0;0.0009765625
55.0;0.0009765625
54.0;0.001953125
51.0;0.0068359375
28.0;0.008877961307018026
31.0;0.03350388737713736
15.0;0.172059248067656
34.5;0.07720326555382324
19.5;0.1741008397065631
24.0;0.045484473987678874


# Wilcoxon-Test over the whole points at once.

In [120]:
for strat in target_strategies:
    stacking_ps = []
    strats_ps = []
    for day in days:
        for fold in np.arange(10):
            stacking_ps.append(stacking[day][fold]['classes'][label_to_idx[target_class]])
            strats_ps.append(macros[strat][day][fold][label_to_idx[target_class]])
    res = stats.wilcoxon(stacking_ps, strats_ps, alternative="greater")
    print(f"{target_class.upper()}\t{strat.upper()}\t{res[0]}\t{res[1]}")

SHADOWSERVER	IDARKVEC	4701.0	3.505575950575009e-14
SHADOWSERVER	IGCNGRU_FEATURES	4841.0	7.098715339266108e-14


In [121]:
a = [4,2,1,4,2,3,5,1,2,1]
b = [3,3,4,5,5,1,8,5,4,4]

In [122]:
stats.wilcoxon(a, b, alternative='greater')

WilcoxonResult(statistic=6.5, pvalue=0.990234375)

In [123]:
stats.wilcoxon(b, a, alternative='greater')

WilcoxonResult(statistic=48.5, pvalue=0.0185546875)