In [1]:
import pickle
import numpy as np
import pandas as pd
import scipy.stats as stats
import scikit_posthocs as sp
from sklearn.metrics import classification_report

In [6]:
k_n = 'k3'

stacking_strategies = [
   "gcn_features",
    "igcn_features",
    "gcngru_features",
    "igcngru_features",
    "idarkvec"]

strategies = [
    "idarkvec"
]


In [7]:
strats_posfix = '-'.join(sorted(stacking_strategies))

In [9]:
with open(f"reports/stacking-nodes-v-0.0/{strats_posfix}/{k_n}.pkl", "rb") as fd:
    stacking_reporte = pickle.load(fd)

In [10]:
days = list(stacking_reporte.keys())
days.sort()

In [11]:
def f1(df: pd.DataFrame):

    probs_cols = ['mirai',
                  'unk_bruteforcer',
                  'unk_spammer',
                  'shadowserver',
                  'driftnet',
                  'internetcensus',
                  'censys',
                  'rapid7',
                  'onyphe',
                  'netsystems',
                  'shodan',
                  'unk_exploiter',
                  'securitytrails',
                  'intrinsec',
                  'unknown']
    
    # Getting only probabilities.
    probs = df[probs_cols].copy(deep=True)
    # Getting labels.
    labels = df.y_true.copy(deep=True)
    # Taking predctions out of probabilities.
    preds = probs.idxmax(axis=1)
    # Macro by class.
    return classification_report(labels, preds, labels=np.unique(labels), output_dict=True, zero_division=0.0)

In [213]:
strat_scores = {}
for strat in strategies:
    strat_scores[strat] = []
    for day in days:
        for fold in np.arange(10):
            df = pd.read_csv(f"stacking_predictions/out/{k_n}/test/{strat}_{day}_fold0{fold}.csv")
            strat_scores[strat].append(f1(df))
            

In [214]:
stacking_scores = []
for day in days:
    for fold in np.arange(10):
        labels = stacking_reporte[day][fold]['y']
        preds = stacking_reporte[day][fold]['preds']
        stacking_scores.append(classification_report(labels, preds, labels=np.unique(labels), output_dict=True, zero_division=0.0))

In [215]:
stacking_scores[0].keys()

dict_keys(['censys', 'driftnet', 'internetcensus', 'intrinsec', 'mirai', 'netsystems', 'onyphe', 'securitytrails', 'shadowserver', 'shodan', 'unk_bruteforcer', 'unk_exploiter', 'unk_spammer', 'accuracy', 'macro avg', 'weighted avg'])

# Kruskal-Wallis test & PostHoc Test

In [216]:
def kruskal_posthoc(strat_points, alpha, target, strategies):

    strat_labels = strategies.copy()
    strat_labels.append("stacking")
    try:
        _, p_value = stats.kruskal(*strat_points)
    except Exception as Err:
        print(f"{target.upper()} - {str(Err)}")
        return Err, "TIE"

    if p_value < alpha:

        # Perform post-hoc tests to identify which models differ from each other
        posthoc = sp.posthoc_dunn(strat_points)
        
        # Based on the post-hoc results, you can identify the best-performing model
        best_idx = None
        best_model_mean_score = 0
        for i, scores in enumerate(strat_points):
            mean_score = sum(scores) / len(scores)
            if mean_score > best_model_mean_score:
                best_idx = i
                best_model_mean_score = mean_score
        # If the bigger mean model is statistically significant different from the others.
        if np.sum(posthoc.values[best_idx] >= alpha) == 1:
            best_model = strat_labels[best_idx].upper()
            print(f"{target.upper()}.\tThe Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: {best_model}")
            return posthoc, best_model
        return posthoc, "TIE"
    
    else:
        #print("The Kruskal-Wallis test is not statistically significant, suggesting no significant differences among the models.")
        print(f"{target.upper()}.\tThe Kruskal-Wallis test statistically significant, suggesting no significant differences among the models.")
        return None, "TIE"


In [217]:
alpha = 0.05

In [218]:

cols = ['mirai',
    'unk_bruteforcer',
    'unk_spammer',
    'shadowserver',
    'driftnet',
    'internetcensus',
    'censys',
    'rapid7',
    'onyphe',
    'netsystems',
    'shodan',
    'unk_exploiter',
    'securitytrails',
    'intrinsec',
    'macro avg']

In [219]:
posthocs, best_model = [], []
for label in cols:
    strat_points = [ [ point[label]["f1-score"] for point in strat_scores[strat] if label in point ]
        for strat in strategies ]    
    strat_points.append([ point[label]["f1-score"] for point in stacking_scores if label in point ])
    p, b = kruskal_posthoc(strat_points, alpha, label, strategies)
    posthocs.append(p)
    best_model.append(b)

MIRAI.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
UNK_BRUTEFORCER.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
UNK_SPAMMER.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
SHADOWSERVER.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
DRIFTNET.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
INTERNETCENSUS.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-performing model is: STACKING
CENSYS.	The Kruskal-Wallis test is statistically significant, indicating differences among the models. The best-perform

In [220]:
full_scores = strat_scores.copy()
full_scores["stacking"] = stacking_scores.copy()

In [221]:
strat_means = {}
for strat in full_scores:
    strat_means[strat] = []
    for label in cols:
        m = np.mean(
            [ point[label]["f1-score"] for point in full_scores[strat] if label in point ]
        )
        strat_means[strat].append(np.around(m, decimals=4))



In [222]:
f1_table = pd.DataFrame(strat_means)

In [223]:
f1_table.insert(loc=0, column="Labels", value=cols)
f1_table["BestModel"] = best_model
f1_table

Unnamed: 0,Labels,idarkvec,stacking,BestModel
0,mirai,0.9802,0.9934,STACKING
1,unk_bruteforcer,0.6059,0.7598,STACKING
2,unk_spammer,0.4201,0.7236,STACKING
3,shadowserver,0.9575,0.9987,STACKING
4,driftnet,0.9813,0.9987,STACKING
5,internetcensus,0.9925,0.9987,STACKING
6,censys,0.9474,0.9887,STACKING
7,rapid7,0.9996,0.9991,TIE
8,onyphe,0.9696,0.9961,STACKING
9,netsystems,0.4558,0.9784,STACKING


In [224]:
f1_table.BestModel

0     STACKING
1     STACKING
2     STACKING
3     STACKING
4     STACKING
5     STACKING
6     STACKING
7          TIE
8     STACKING
9     STACKING
10         TIE
11         TIE
12         TIE
13    STACKING
14    STACKING
Name: BestModel, dtype: object

In [225]:
values = np.arange(1, 22)
values

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21])

In [226]:
g1 = values[:10]
g1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [227]:
g2 = values[10:]
g2

array([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [228]:
(np.mean(g1) + np.mean(g2))/2

10.75

In [229]:
np.mean(values)

11.0