In [1]:
import pandas as pd
from glob import glob
from sklearn.metrics import classification_report
import numpy as np


# Load DataFrames
files = glob('out_loo/k3/train/igcngru_features_202112*')
dfs = [pd.read_csv(x, index_col=['src_ip']) for x in files]

reps=[]
# For each DataFrame
for df in dfs:
    # Drop Unknown samples and y_true column
    tmp = df[df.y_true!='unknown'].drop(columns=['y_true'])
    # Get the prediction as the label with the maximum prediction probability
    tmp = tmp.T.idxmax().reset_index().rename(columns={0:'y_pred'})
    # Merge src_ip, y_true, y_pred
    tmp = tmp.merge(df[['y_true']].reset_index(), on='src_ip').set_index('src_ip')
    # Get the classification report
    rep = classification_report(
        tmp.y_true, 
        tmp.y_pred, 
        labels=np.unique(tmp.y_true),
        output_dict=True,
        zero_division=0.0
    )
    # Extract only F1-Score
    rep = pd.DataFrame(rep).T[['f1-score']]
    # Append the daily report
    reps.append(rep)

# Concatenate the daily reports and get the mean
avg = pd.concat(reps, axis=1).mean(1).round(2).drop(columns=['micro avg', 'weighted avg'])

# Correct index
avg = avg.reindex([
    'mirai', 'unk_bruteforcer', 'unk_spammer', 'shadowserver', 'driftnet', 'internetcensus',
    'censys', 'rapid7', 'onyphe', 'netsystems', 'shodan', 'unk_exploiter', 'securitytrails',
    'intrinsec', 'macro avg'
])

avg

mirai              0.98
unk_bruteforcer    0.63
unk_spammer        0.47
shadowserver       0.96
driftnet           0.97
internetcensus     0.92
censys             0.92
rapid7             0.97
onyphe             0.91
netsystems         0.95
shodan             0.79
unk_exploiter      0.10
securitytrails     0.98
intrinsec          0.69
macro avg          0.80
dtype: float64