# Helper notebook to help parse and analyze results from the experiments

In [None]:
from tqdm import tqdm 
from sklearn.metrics import roc_curve
import numpy as np
from scipy.stats import mode
import random

def compute_eer(label, pred):
    """
    Computes EER given a list of labels and predictions.

    Code inspired by https://github.com/YuanGongND/python-compute-eer
    """
    # all fpr, tpr, fnr, fnr, threshold are lists (in the format of np.array)
    fpr, tpr, threshold = roc_curve(label, pred)
    fnr = 1 - tpr

    # theoretically eer from fpr and eer from fnr should be identical but they can be slightly differ in reality
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]

    # return the mean of eer from fpr and from fnr
    eer = (eer_1 + eer_2) / 2
    return eer


def calculate_eer_in_sliding_window(y_test, res, window_size=1):
    '''
    Computes the EER using a window of size `window_size`
    '''
    frrs = []
    fars = []
    end_index = max(np.where(y_test == 1)[0])
    user_results = res[: end_index]
    intruder_results = res[end_index:]

    user_windows = user_results[(
        np.expand_dims(np.arange(window_size), 0) +
        np.expand_dims(np.arange(len(user_results) - window_size), 0).T
    )]

    intruder_windows = intruder_results[(
            np.expand_dims(np.arange(window_size), 0) +
            np.expand_dims(np.arange(len(intruder_results) - window_size), 0).T
        )]

    desc_scores = np.sort(res)[::-1]
    thresholds = []
    for threshold in desc_scores[::10]:
        FRR = 1 - np.mean(mode(user_windows >= threshold, axis=1)[0])
        FAR = 1 - np.mean(mode(intruder_windows < threshold, axis=1)[0])

        thresholds.append(threshold)
        frrs.append(FRR)
        fars.append(FAR)

    fars = np.array(fars)
    frrs = np.array(frrs)
    thresholds = np.array(thresholds)

    eer = fars[np.argwhere(np.diff(np.sign(fars - frrs))).flatten()]
    thresh = thresholds[np.argwhere(np.diff(np.sign(fars - frrs))).flatten()]
    return (eer[0], thresh) if eer.size > 0 else (1, 0)

def calculate_eer_with_intruders(y_test, res, window_size=1, intruders = 1, threshold = 0):
    '''
    Calculates the EER by polluting teh data with `intruders` indtruders. threshold for the EER has to be provided. 
    '''
    end_index = max(np.where(y_test == 1)[0])
    user_results = res[: end_index]
    intruder_results = res[end_index:]

    user_windows = user_results[(
        np.expand_dims(np.arange(window_size), 0) +
        np.expand_dims(np.arange(len(user_results) - window_size), 0).T
    )]

    intruder_windows = intruder_results[(
            np.expand_dims(np.arange(intruders), 0) +
            np.expand_dims(np.arange(len(intruder_results) - window_size), 0).T
        )]

    start_index = random.randint(0, intruder_windows.shape[0] - user_windows.shape[0] - 2)
    intruder_windows = intruder_windows[start_index: start_index + user_windows.shape[0]]
    if intruders != 0:
        user_windows[:, -intruders:] = intruder_windows

    return np.mean(mode(user_windows > threshold, axis=1)[0])

# Calculate results from a specific experiment (for all iterations)

In [None]:
import pickle, glob

results = []
for file in tqdm(glob.glob(f'test_results/*/*')):
    with open(file, 'rb') as f:
        y_test, res, *_ = pickle.load(f)
    results.append(compute_eer(y_test, res))
print(np.mean(results, axis=0))

# Calculate EER in sliding window - majority vote

Calculate the EER in a sliding window using majority vote appraoch. Multithreading is employed due to the large amount of data.

In [None]:
import glob
import numpy as np
import pickle
from multiprocessing import Pool

def calc_eer_with_winow(file_path):
    r = []
    with open(file_path, 'rb') as f:
        y_test, res, *_ = pickle.load(f)
        for windows in [3,5,7,9,11,13]:
            a = calculate_eer_in_sliding_window(y_test, res, windows)
            r.append(a)
    with open(f'touchalytics_area_windows/{file_path.split("/")[-1]}', 'wb') as f:
        pickle.dump(r, f)


# In case the script is paused, only process the data that has not been processed yet
l = list(glob.glob('results/touchalytics_with_area/*'))
l = [int(x.split('_')[-1][:-4]) for x in l]

l2 = list(glob.glob('touchalytics_area_windows/*'))
l2 = [int(x.split('_')[-1][:-4]) for x in l2]
a = list(set(l).difference(set(l2)))

a = [x for x in list(glob.glob('results/touchalytics_with_area/*')) if int(x.split('_')[-1][:-4]) in a]

with Pool(32) as p:
    p.map(calc_eer_with_winow, a)


# Calculate intruder effect on EER - majority vote windows

Calcualtes the intruder effects on EER.
Thresholds have to be generated before for each window size, using the `calculate_eer_in_sliding_window` method.

In [None]:
result = []

for fn in tqdm(sorted(glob.glob('results/touchalytics_with_area/*'))):
    with open(fn, 'rb') as f:
        uid = int(fn.split('_')[-1][:-4])
        y_test, res, *_ = pickle.load(f)
        tr = []
        for i in range(0,11):
            tr.append(np.mean([calculate_eer_with_intruders(y_test, res, 11, i, r[uid][-1][1][0]) for _ in range(100)]))
        result.append(tr)

result = np.array(result)

# Calculate mean and std of features in same-session adn different session windows

Calculates the std of means, and the std of stds for each feature in same session windows (non-overlaping windows of size 400 are expected to have similar values).

The following cell performs the same calculations with windows from different sessions (windows over the entire sessions are expected to have values larger than the previous ones).

Data has to be loaded from the pickled BrainRun file.

In [None]:
intra_session_results = []
window_split = 400

for user_id in range(len(users)):
    for session in users[user_id]['devices'][0]['sessions']:
        if len(session) < window_split:
            continue
        splits_mean = np.array([np.mean(session[i:i+window_split], axis = 0) for i in range(0, len(session), window_split)])
        splits_std = np.array([np.std(session[i:i+window_split], axis = 0) for i in range(0, len(session), window_split)])
        std_of_means = np.std(splits_mean, axis = 0)
        std_of_stds = np.std(splits_std, axis = 0)
        m1 = np.mean(std_of_means)
        m2 = np.mean(std_of_stds)
        intra_session_results.append((m1, m2))

intra_session_results = np.array(intra_session_results)
np.mean(intra_session_results, axis = 0)


In [None]:
inter_session_results = []
window_split = 1000
sess_m = []
sess_s = []

for user_id in range(len(users)):
    for session in users[user_id]['devices'][0]['sessions']:
        if len(session) < window_split:
            continue
        session_mean = np.mean(session, axis = 0)
        session_std = np.std(session, axis = 0)
        sess_m.append(session_mean)
        sess_s.append(session_std)

print(np.mean(np.std(sess_m, axis = 0)))
print(np.mean(np.std(sess_s, axis = 0)))