In [1]:
%matplotlib widget
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

from toolkit import *
from pathlib import Path
from dataclasses import dataclass, field

from itertools import combinations
from collections import defaultdict, namedtuple
from dataclasses import dataclass, field
from typing import *

gcp_logger.setLevel(logging.INFO)

n_conns = 800  # number of container instances to create
duration = 180  # sleep for some time before the validation (to occupy more hosts)
region = 'us-central1'
service_url = ''  # Put your service URL here

In [2]:
# validation measurements

if duration:
    conns = await connect_ts_sockets(service_url, n_conns)
    await disconnect_ts_sockets(conns)
    time.sleep(duration)

# AMD CPUs does not appear to have the rdseed covert channel, so we grab Intel machine only
# When we conducted the experiment, only Intel CPUs were observed
conns = await connect_ts_sockets(service_url, n_conns,
                                 extra_filter=lambda c: c.greeting.rdseed_support and c.greeting.is_intel)
mgroups = group_by_attrib(conns, func=lambda c: c.greeting.machine)
validator = FinpValidator(dict(), emitter_factory=RdseedEmitter)
grps, failed = await validator.validate(conns)
await disconnect_ts_sockets(conns)
print(f'{len(grps)} groups, {len(failed)} failed conns')
res = [{c.greeting for c in grp} for grp in grps]
val_res = res

[INFO] 2024-04-29 08:50:42,183@MainThread: A burst of 800 HTTP requests to https://eaao-t2dkbl7zea-uc.a.run.app/ takes 12.956s
[INFO] 2024-04-29 08:50:52,489@MainThread: Filtered out 142 SocketIO connections
[INFO] 2024-04-29 08:50:52,491@MainThread: https://eaao-t2dkbl7zea-uc.a.run.app/: 574 healthy SocketIO connections so far.
[INFO] 2024-04-29 08:51:05,801@MainThread: Filtered out 39 SocketIO connections
[INFO] 2024-04-29 08:51:05,803@MainThread: https://eaao-t2dkbl7zea-uc.a.run.app/: 691 healthy SocketIO connections so far.
[INFO] 2024-04-29 08:51:18,958@MainThread: Filtered out 20 SocketIO connections
[INFO] 2024-04-29 08:51:18,960@MainThread: https://eaao-t2dkbl7zea-uc.a.run.app/: 743 healthy SocketIO connections so far.
[INFO] 2024-04-29 08:51:32,105@MainThread: Filtered out 11 SocketIO connections
[INFO] 2024-04-29 08:51:32,108@MainThread: https://eaao-t2dkbl7zea-uc.a.run.app/: 775 healthy SocketIO connections so far.
[INFO] 2024-04-29 08:51:45,241@MainThread: Filtered out 3 So

In [3]:
# data process
from sklearn.metrics.cluster import fowlkes_mallows_score, contingency_matrix

@dataclass
class ClusteringScore:
    fmi: float
    samples: int
    pairs: int
    TP: int
    FP: int
    TN: int
    FN: int
    config: Dict
    FP_grps: List[List[TsResp]] = field(default_factory=list)
    FN_grps: List[List[TsResp]] = field(default_factory=list)
    FN_pairs: List[Tuple[TsResp, TsResp]] = field(default_factory=list)
    FP_pairs: List[Tuple[TsResp, TsResp]] = field(default_factory=list)

    @property
    def precision(self):
        return self.TP / (self.TP + self.FP)

    @property
    def recall(self):
        return self.TP / (self.TP + self.FN)


def calc_score(grps: List[Set[TsResp]], ret_pairs: bool = False, **kwargs):
    resps = [r for grp in grps for r in grp]
    oracle_label_map = {r: i for i, grp in enumerate(grps) for r in grp}
    olabels = [oracle_label_map[r] for r in resps]
    plabels = [hash(r.get_machine(**kwargs)) for r in resps]

    m_grps = group_by_attrib(resps, func=lambda r: r.get_machine(**kwargs))
    m_uniq = {m: len({oracle_label_map[r] for r in grp}) for m, grp in m_grps.items()}
    fp_grps = []
    for m, cnt in m_uniq.items():
        if cnt > 1:
            fp_grps.append(m_grps[m])

    fn_grps = []
    for grp in grps:
        if len({r.get_machine(**kwargs) for r in grp}) > 1:
            fn_grps.append(grp)

    n_samples = len(resps)
    n_pairs = (n_samples - 1) * n_samples // 2

    fmi = fowlkes_mallows_score(olabels, plabels, sparse=True)
    c = contingency_matrix(olabels, plabels, sparse=True).astype(np.int64, copy=False)
    tp = (np.dot(c.data, c.data) - n_samples) // 2
    fp = (np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples) // 2 - tp
    fn = (np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples) // 2 - tp
    tn = n_pairs - tp - fp - fn

    fn_pairs, fp_pairs = [], []
    if ret_pairs:
        for r1, r2 in combinations(resps, 2):
            if oracle_label_map[r1] == oracle_label_map[r2] and \
               r1.get_machine(**kwargs) != r2.get_machine(**kwargs):
                fn_pairs.append((r1, r2))
            elif oracle_label_map[r1] != oracle_label_map[r2] and \
               r1.get_machine(**kwargs) == r2.get_machine(**kwargs):
                fp_pairs.append((r1, r2))

    return ClusteringScore(fmi, n_samples, n_pairs, tp, fp, tn, fn,
                           kwargs, fp_grps, fn_grps, fn_pairs, fp_pairs)


def get_score_mean_std(score_lists: List[List[ClusteringScore]],
                       attrib: str) -> Tuple[np.ndarray, np.ndarray]:
    data = np.array([[getattr(s, attrib) for s in sl] for sl in score_lists], dtype=np.float64)
    return np.mean(data, axis=0), np.std(data, axis=0)

In [4]:
print(f'FMI: {calc_score(val_res).fmi}')

FMI: 1.0
