# Computação da F1 com Wilcoxon-test

In [1]:
import pickle
import numpy as np
import pandas as pd
import scipy.stats as stats
from glob import glob
from sklearn.metrics import f1_score

In [2]:
def load_pickle(pickle_path: str):

    with open(pickle_path, 'rb') as fd:
        return pickle.load(fd)

In [3]:
strategies = [
    "igcngru_features",
    "idarkvec"
]

k_n = 'k3'

In [6]:
df = pd.read_csv("../data/2022/input/stacking_predictions/out/k3/test/idarkvec_20221021_fold02.csv")

In [7]:
df.columns

Index(['Unnamed: 0', 'src_ip', 'censys', 'driftnet', 'internetcensus',
       'intrinsec', 'ipip', 'mirai', 'onyphe', 'rapid7', 'securitytrails',
       'shadowserver', 'shodan', 'u_mich', 'unk_bruteforcer', 'unk_exploiter',
       'unk_spammer', 'unknown', 'y_true'],
      dtype='object')

In [8]:
def f1(df: pd.DataFrame):
    probs_cols = [
        "censys",
        "driftnet",
        "internetcensus",
        "intrinsec",
        "ipip",
        "mirai",
        "onyphe",
        "rapid7",
        "securitytrails",
        "shadowserver",
        "shodan",
        "u_mich",
        "unk_bruteforcer",
        "unk_exploiter",
        "unk_spammer",
        "unknown",
    ]

    label_to_idx = {l: idx for idx, l in enumerate(probs_cols)}

    # Getting only probabilities.
    probs = df.drop(columns=["y_true"])[probs_cols].copy(deep=True)
    # Getting labels.
    labels = df.y_true.copy(deep=True)
    # Building a dictionay of labels (strings) to int (class number).
    # Converting labels (string) to int (class number).
    y = [label_to_idx[l] for l in labels]
    # Taking predctions out of probabilities.
    preds = probs.values.argmax(axis=1)
    # Computing metrics.
    macro = f1_score(
        y, preds, labels=np.arange(len(probs_cols)), average="macro", zero_division=0
    )
    # Macro by class.
    macro_by_class = f1_score(
        y, preds, labels=np.arange(len(probs_cols)), average=None, zero_division=0
    )
    return macro, macro_by_class

In [9]:
days = sorted([ f.split('/')[-1].split('_')[-2] for f in glob(f"../data/2022/input/stacking_predictions/out/{k_n}/test/idarkvec*_fold00.csv") ])
days

['20221021',
 '20221022',
 '20221023',
 '20221024',
 '20221025',
 '20221026',
 '20221027',
 '20221028',
 '20221029',
 '20221030',
 '20221031']

In [10]:
pickle_path = f"../data/2022/output/reports/stacking-v-0.4/idarkvec-igcngru_features/k3.pkl"
stacking = load_pickle(pickle_path)

In [11]:
stacking['20221021'][0].keys()

dict_keys(['y', 'preds'])

### Computing Wilcoxon-Test by Day.

In [12]:
cols = [
    "censys",
    "driftnet",
    "internetcensus",
    "intrinsec",
    "ipip",
    "mirai",
    "onyphe",
    "rapid7",
    "securitytrails",
    "shadowserver",
    "shodan",
    "u_mich",
    "unk_bruteforcer",
    "unk_exploiter",
    "unk_spammer",
    "unknown",
]

label_to_idx = {col: idx for idx, col in enumerate(cols)}

In [13]:
target_class = "shadowserver"
target_strategies = ["idarkvec", "igcngru_features"]

In [15]:
macros = {}
paired_test_days = {}
# For each strategy.
for strat in target_strategies:
    print('*' * 5, strat, '*' * 5)
    macros[strat] = {}
    paired_test_days[strat] = {}
    # For each day.
    for day in days:
        macros[strat][day] = {}
        strat_points, stacking_points = [], []
        # For each fold.
        for fold in np.arange(10):
            file_path = f"../data/2022/input/stacking_predictions/out/{k_n}/test/{strat}_{day}_fold0{fold}.csv"
            df = pd.read_csv(file_path)
            _, macro_by_class = f1(df)
            macros[strat][day][fold] = macro_by_class
            strat_points.append(macro_by_class[label_to_idx[target_class]])
            stacking_points.append(f1_score(stacking[day][fold]['y'],
                                    stacking[day][fold]["preds"],
                                    average=None,
                                    labels=np.arange(len(cols)),
                                    zero_division=0)[label_to_idx[target_class]])
            
        try:
            res = stats.wilcoxon(stacking_points, strat_points, alternative='greater')
        except:
            res = [-1, -1]
        paired_test_days[strat][day] = f"{res[0]};{res[1]}"
        
        

        

***** idarkvec *****
***** igcngru_features *****


In [16]:
for strat in target_strategies:
    print(strat)
    for day in days:
        print(paired_test_days[strat][day])

idarkvec
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
igcngru_features
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0
0.0;1.0


# Wilcoxon-Test over the whole points at once.

In [120]:
for strat in target_strategies:
    stacking_ps = []
    strats_ps = []
    for day in days:
        for fold in np.arange(10):
            stacking_ps.append(stacking[day][fold]['classes'][label_to_idx[target_class]])
            strats_ps.append(macros[strat][day][fold][label_to_idx[target_class]])
    res = stats.wilcoxon(stacking_ps, strats_ps, alternative="greater")
    print(f"{target_class.upper()}\t{strat.upper()}\t{res[0]}\t{res[1]}")

SHADOWSERVER	IDARKVEC	4701.0	3.505575950575009e-14
SHADOWSERVER	IGCNGRU_FEATURES	4841.0	7.098715339266108e-14


In [121]:
a = [4,2,1,4,2,3,5,1,2,1]
b = [3,3,4,5,5,1,8,5,4,4]

In [122]:
stats.wilcoxon(a, b, alternative='greater')

WilcoxonResult(statistic=6.5, pvalue=0.990234375)

In [123]:
stats.wilcoxon(b, a, alternative='greater')

WilcoxonResult(statistic=48.5, pvalue=0.0185546875)