### Classification, Authentication, and Identification of users based on free-text analysis.

In [None]:
# imports
import os
import pickle
import numpy as np
import pandas as pd 

minimum_profile_length_r = 10
minimum_profile_length_a = 10     


#### Convert raw data to n-graph profiles

In [None]:
def keystrokes_to_digraphs(keystroke_array):
    digraphs = []
    i = 0    
    while i < len(keystroke_array) -1:
        digraphs.append((str(keystroke_array[i][0])+"-"+str(keystroke_array[i+1][0]),np.round((keystroke_array[i+1][1]-keystroke_array[i][1]),5)))
        i+=1
    return digraphs

def keystrokes_to_trigraphs(keystroke_array):
    trigraphs = []
    i = 0
    while i < len(keystroke_array) -2:   
        trigraphs.append((str(keystroke_array[i][0])+"-"+str(keystroke_array[i+1][0])+"-"+str(keystroke_array[i+2][0]),np.round((keystroke_array[i+2][1]-keystroke_array[i][1]),5)))
        i+=1
    return trigraphs

def keystrokes_to_fourgraphs(keystroke_array):
    fourgraphs = []
    i = 0
    while i < len(keystroke_array) -3:
        fourgraphs.append((str(keystroke_array[i][0])+"-"+str(keystroke_array[i+1][0])+"-"+str(keystroke_array[i+2][0])+"-"+str(keystroke_array[i+3][0]),np.round((keystroke_array[i+3][1]-keystroke_array[i][1]),5)))
        i+=1
    return fourgraphs


def calculate_mean_for_duplicates(ngraphs):
    cleaned_ngraphs = []
    processed_keys = []
    for key, time in ngraphs:
        if key not in processed_keys:
            duplicates = [e for e in ngraphs if e[0] == key ]
            if len(duplicates) > 1:
                processed_keys.append(key)
                cleaned_ngraphs.append((key, np.round(np.mean([d[1] for d in duplicates]),5)))
            else :
                processed_keys.append(key)
                cleaned_ngraphs.append((key,time))
    return cleaned_ngraphs

def create_user_profile(keystroke_sequence):
    digraphs = calculate_mean_for_duplicates(keystrokes_to_digraphs(keystroke_sequence))
    trigraphs = calculate_mean_for_duplicates(keystrokes_to_trigraphs(keystroke_sequence))
    fourgraphs = calculate_mean_for_duplicates(keystrokes_to_fourgraphs(keystroke_sequence))
    return digraphs, trigraphs, fourgraphs


def read_file(complete: pd.DataFrame, user: int, set: int) -> list[(str, int)]:
    key_codes = complete.loc[(complete['user'] == user) & (complete['set'] == set)]['key'].to_list()
    timestamps =complete.loc[(complete['user'] == user) & (complete['set'] == set)]['timestamp'].to_list()

    keystrokes = [(str(k), t) for (k,t) in zip(key_codes, timestamps)]

    return keystrokes

def read_user_data(complete):
    users = []
    
    for user in range(1, 32):
        tmp_keystrokes = []
        for set in range(1, 16):
            f = read_file(complete, user, set)
            tmp_keystrokes.append(f)
        users.append(tmp_keystrokes)
    return users

def get_user_profiles(user_data):
    user_profiles = []
    count = 0
    for u_data in user_data:
        digraphs = []
        trigraphs =[]
        fourgraphs = []
        for sample in u_data:
            tmp_digraphs, tmp_trigraphs, tmp_fourgraphs = create_user_profile(sample)
            digraphs.append(dict(tmp_digraphs))
            trigraphs.append(dict(tmp_trigraphs))
            fourgraphs.append(dict(tmp_fourgraphs))

        # NOTE: We currently ignore data sets 8, 21, and 28 since they show a very poor classification performance. 
        # The index is based on the folder-structure of the raw data. It might change depending on the system. 
        #if count != 13 and count != 18 and count != 24:
        #user_profiles.append({"digraphs": digraphs, "trigraphs": trigraphs, "fourgraphs": fourgraphs})

        count += 1
    return user_profiles

def create_user_profiles(path_to_userdata, filename):
    user_data = read_user_data(pd.read_csv(path_to_userdata))
    user_profiles = get_user_profiles(user_data)
    with open(filename, "wb") as fp:
        pickle.dump(user_profiles, fp)
    

#### Methods for calculating R- and A-distances

In [None]:
def find_matching_ngraphs(ngraphs_a, ngraphs_b):   
    intersection = ngraphs_a.keys() & ngraphs_b.keys()
    return dict(map(lambda key: (key, ngraphs_a.get(key, None)), intersection)) , dict(map(lambda key: (key, ngraphs_b.get(key, None)), intersection)) 


def calculate_r_distance(reference_ngraphs, evaluation_ngraphs):
    number_of_keys = len(evaluation_ngraphs)
    if number_of_keys <minimum_profile_length_r:
        return 1
    order_reference = list(dict(sorted(reference_ngraphs.items(), key=lambda item: item[1])))
    order_evaluation = list(dict(sorted(evaluation_ngraphs.items(), key=lambda item: item[1])))
    order_distance = [abs(order_evaluation.index(ele) - idx) for idx, ele in enumerate(order_reference)]
    if number_of_keys % 2 == 0:
        maximum_disorder = ((number_of_keys*number_of_keys))/2
    else:
        maximum_disorder = ((number_of_keys*number_of_keys)-1)/2
    if len(order_distance) == 0:
        print("WARN: No R distance")
        return 1
    else:
        return np.round(np.sum(order_distance)/maximum_disorder,6)

def calculate_a_distance(reference_ngraphs, evaluation_ngraphs, threshold):
    if len(evaluation_ngraphs)<minimum_profile_length_a:
        return 1
    similar_timings = 0
    for key in evaluation_ngraphs:
        if evaluation_ngraphs[key]>=reference_ngraphs[key]:
            if (evaluation_ngraphs[key]-reference_ngraphs[key])<=threshold:
                similar_timings += 1
        else:
            if (reference_ngraphs[key]-evaluation_ngraphs[key])<=threshold:
                similar_timings += 1
    return np.round(1-(similar_timings/len(evaluation_ngraphs)),6)

def get_matching_subsets(ngraphs_a, ngraphs_b):
    subset_ngraphs_a, subset_ngraphs_b = find_matching_ngraphs(ngraphs_a, ngraphs_b)
    return subset_ngraphs_a, subset_ngraphs_b

def get_a_distance(ngraph_profile, ngraph_sample):
    distances = []
    for p in ngraph_profile:
        set_a, set_b = get_matching_subsets(p,ngraph_sample)
        distances.append(calculate_a_distance(set_a, set_b,1.05))
    return np.mean(distances)

def get_r_distance(ngraph_profile, ngraph_sample):
    distances = []
    for p in ngraph_profile:
        set_a, set_b = get_matching_subsets(p,ngraph_sample)
        tmp = calculate_r_distance(set_a, set_b)
        if tmp != 99:
            distances.append(tmp)
    if len(distances) == 0:
        return 1
    return np.round(np.mean(distances),5)

def get_mean_r_distance(samples_digraphs, samples_trigraps, samples_fourgraphs):
    distances_digraphs = []
    distances_trigraphs = []
    distances_fourgraphs = []
    for i1, s1 in enumerate(samples_digraphs):
        for i2, s2 in enumerate(samples_digraphs):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_digraphs.append(calculate_r_distance(set_a, set_b))
    for i1, s1 in enumerate(samples_trigraps):
        for i2, s2 in enumerate(samples_trigraps):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_trigraphs.append(calculate_r_distance(set_a, set_b))
    for i1, s1 in enumerate(samples_fourgraphs):
        for i2, s2 in enumerate(samples_fourgraphs):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_fourgraphs.append(calculate_r_distance(set_a, set_b))
    return (np.mean(distances_digraphs) , np.mean(distances_trigraphs), np.mean(distances_fourgraphs))

def get_mean_a_distance(samples_digraphs, samples_trigraps, samples_fourgraphs):
    distances_digraphs = []
    distances_trigraphs = []
    distances_fourgraphs = []
    for i1, s1 in enumerate(samples_digraphs):
        for i2, s2 in enumerate(samples_digraphs):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_digraphs.append(calculate_a_distance(set_a, set_b,1.05))
    for i1, s1 in enumerate(samples_trigraps):
        for i2, s2 in enumerate(samples_trigraps):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_trigraphs.append(calculate_a_distance(set_a, set_b,1.05))
    for i1, s1 in enumerate(samples_fourgraphs):
        for i2, s2 in enumerate(samples_fourgraphs):
            if i1 != i2:
                set_a, set_b = get_matching_subsets(s1,s2)
                distances_fourgraphs.append(calculate_a_distance(set_a, set_b,1.05))
    return (np.mean(distances_digraphs) , np.mean(distances_trigraphs), np.mean(distances_fourgraphs))



def get_a_distances(user_profile_digraph, user_profile_trigraph, user_profile_fourgraph, sample_digraph, sample_trigraph, sample_fourgraph):
    a2 = get_a_distance(user_profile_digraph, sample_digraph)
    a3 = get_a_distance(user_profile_trigraph, sample_trigraph)
    a4 = get_a_distance(user_profile_fourgraph, sample_fourgraph)
    return (a2,a3,a4)

def get_r_distances(user_profile_digraph, user_profile_trigraph, user_profile_fourgraph, sample_digraph, sample_trigraph, sample_fourgraph):
    r2 = get_r_distance(user_profile_digraph, sample_digraph)
    r3 = get_r_distance(user_profile_trigraph, sample_trigraph)
    r4 = get_r_distance(user_profile_fourgraph, sample_fourgraph)
    return (r2, r3, r4)

def calculate_distances(user_profile_digraph, user_profile_trigraph, user_profile_fourgraph, sample_digraph, sample_trigraph, sample_fourgraph):
    a = get_a_distances(user_profile_digraph, user_profile_trigraph, user_profile_fourgraph, sample_digraph, sample_trigraph, sample_fourgraph)
    r = get_r_distances(user_profile_digraph, user_profile_trigraph, user_profile_fourgraph, sample_digraph, sample_trigraph, sample_fourgraph)
    return {
        "a2": a[0],
        "a3": a[1],
        "a4": a[2],
        "a23": a[0]+a[1],
        "a24": a[0]+a[2],
        "a34": a[1]+a[2],
        "a234": a[0]+a[1]+a[2],
        "r2": r[0],
        "r3": r[1],
        "r4": r[2],
        "r23": r[0]+r[1],
        "r24": r[0]+r[2],
        "r34": r[1]+r[2],
        "r234": r[0]+r[1]+r[2],
        "r2_a2": r[0]+ a[0],
        "r2_a23": r[0]+a[0]+a[1],
        "r2_a24": r[0]+a[0]+a[2],
        "r2_a234": r[0]+a[0]+a[1]+a[2],

        "r23_a2": r[0]+r[1] + a[0],
        "r23_a23": r[0]+r[1] + a[0]+a[1],
        "r23_a24": r[0]+r[1] + a[0]+a[2],
        "r23_a234": r[0]+r[1] + a[0]+a[1]+a[2],

        "r234_a2": r[0]+r[1]+r[2] + a[0],
        "r234_a23": r[0]+r[1]+r[2] +  a[0]+a[1],
        "r234_a24": r[0]+r[1]+r[2] +  a[0]+a[2],
        "r234_a234": r[0]+r[1]+r[2] + a[0]+a[1]+a[2]
    }


In [None]:
def classify_user_experiment(user_profiles_training, user_profile_evaluation, user):
    tp_a2 = 0
    tp_a3 = 0
    tp_a4 = 0
    tp_a23 = 0
    tp_a24 = 0
    tp_a34 = 0
    tp_a234 = 0

    tp_r2 = 0
    tp_r3 = 0
    tp_r4 = 0
    tp_r23 = 0
    tp_r24 = 0
    tp_r34 = 0
    tp_r234 = 0

    tp_r2_a2 = 0
    tp_r2_a23 = 0
    tp_r2_a24 = 0
    tp_r2_a234 = 0

    tp_r23_a2 = 0
    tp_r23_a23 = 0
    tp_r23_a24 = 0
    tp_r23_a234 = 0

    tp_r234_a2 = 0
    tp_r234_a23 = 0
    tp_r234_a24 = 0
    tp_r234_a234 = 0

    for j in range(0,15):
        a2_distance = []
        a3_distance = []
        a4_distance = []
        a23_distance = []
        a24_distance = []
        a34_distance = []
        a234_distance = []

        r2_distance = [] 
        r3_distance = []
        r4_distance = []
        r23_distance = []
        r24_distance = []
        r34_distance = []
        r234_distance = []

        r2_a2 = []
        r2_a23 = []
        r2_a24 = []
        r2_a234 = []

        r23_a2 = []
        r23_a23 = []
        r23_a24 = []
        r23_a234 = []

        r234_a2 = []
        r234_a23 = []
        r234_a24 = []
        r234_a234 = []

        for u in range(0,len(user_profiles_training)):
            profile_digraphs = [key for i, key in enumerate(user_profiles_training[u]["digraphs"]) if not (u == user and i == j) ]#not (u == user and i == j) ]
            profile_trigraphs = [key for i, key in enumerate(user_profiles_training[u]["trigraphs"]) if not (u == user and i == j) ]#not (u == user and i == j) ]
            profile_fourgraphs = [key for i, key in enumerate(user_profiles_training[u]["fourgraphs"]) if not (u == user and i == j) ]#not (u == user and i == j) ]
            #md(A,X)
            a_distances = get_a_distances(profile_digraphs, profile_trigraphs, profile_fourgraphs, user_profile_evaluation[user]["digraphs"][j],user_profile_evaluation[user]["trigraphs"][j],user_profile_evaluation[user]["fourgraphs"][j])

            a2_distance.append(a_distances[0])
            a3_distance.append(a_distances[1])
            a4_distance.append(a_distances[2])
            a23_distance.append(a_distances[0]+a_distances[1])
            a24_distance.append(a_distances[0]+a_distances[2])
            a34_distance.append(a_distances[1]+a_distances[2])
            a234_distance.append(a_distances[0]+a_distances[1]+a_distances[2])

            r_distances = get_r_distances(profile_digraphs, profile_trigraphs, profile_fourgraphs, user_profile_evaluation[user]["digraphs"][j],user_profile_evaluation[user]["trigraphs"][j],user_profile_evaluation[user]["fourgraphs"][j])
                
            r2_distance.append(r_distances[0])
            r3_distance.append(r_distances[1])
            r4_distance.append(r_distances[2])
            r23_distance.append(r_distances[0]+r_distances[1])
            r24_distance.append(r_distances[0]+r_distances[2])
            r34_distance.append(r_distances[1]+r_distances[2])
            r234_distance.append(r_distances[0]+r_distances[1]+r_distances[2])

            r2_a2.append(r_distances[0]+ a_distances[0])
            r2_a23.append(r_distances[0] + a_distances[0] + a_distances[1] )
            r2_a24.append(r_distances[0] + a_distances[0] + a_distances[2])
            r2_a234.append(r_distances[0] + a_distances[0]+a_distances[1]+a_distances[2])

            r23_a2.append(r_distances[0]+r_distances[1] + a_distances[0])
            r23_a23.append(r_distances[0]+r_distances[1] + a_distances[0]+a_distances[1])
            r23_a24.append(r_distances[0]+r_distances[1] + a_distances[0]+a_distances[2])
            r23_a234.append(r_distances[0]+r_distances[1] + a_distances[0]+a_distances[1]+a_distances[2])

            r234_a2.append(r_distances[0]+r_distances[1]+r_distances[2] + a_distances[0])
            r234_a23.append(r_distances[0]+r_distances[1]+r_distances[2] + a_distances[0]+a_distances[1])
            r234_a24.append(r_distances[0]+r_distances[1]+r_distances[2] + a_distances[0]+a_distances[2])
            r234_a234.append(r_distances[0]+r_distances[1]+r_distances[2] + a_distances[0]+a_distances[1]+a_distances[2])
            
        if user == np.argmin(np.array(a2_distance)):
            tp_a2 += 1
        if user == np.argmin(np.array(a3_distance)):
            tp_a3 += 1
        if user == np.argmin(np.array(a4_distance)):
            tp_a4 += 1
        if user == np.argmin(np.array(a23_distance)):
            tp_a23 += 1
        if user == np.argmin(np.array(a24_distance)):
            tp_a24 += 1
        if user == np.argmin(np.array(a34_distance)):
            tp_a34 += 1
        if user == np.argmin(np.array(a234_distance)):
            tp_a234 += 1
        
        if user == np.argmin(np.array(r2_distance)):
            tp_r2 += 1
        if user == np.argmin(np.array(r3_distance)):
            tp_r3 += 1
        if user == np.argmin(np.array(r4_distance)):
            tp_r4 += 1
        if user == np.argmin(np.array(r23_distance)):
            tp_r23 += 1
        if user == np.argmin(np.array(r24_distance)):
            tp_r24 += 1
        if user == np.argmin(np.array(r34_distance)):
            tp_r34 += 1
        if user == np.argmin(np.array(r234_distance)):
            tp_r234 += 1

        if user == np.argmin(np.array(r2_a2)):
            tp_r2_a2 += 1
        if user == np.argmin(np.array(r2_a23)):
            tp_r2_a23 += 1
        if user == np.argmin(np.array(r2_a24)):
            tp_r2_a24 += 1
        if user == np.argmin(np.array(r2_a234)):
            tp_r2_a234 += 1

        if user == np.argmin(np.array(r23_a2)):
            tp_r23_a2 += 1
        if user == np.argmin(np.array(r23_a23)):
            tp_r23_a23 += 1
        if user == np.argmin(np.array(r23_a24)):
            tp_r23_a24 += 1
        if user == np.argmin(np.array(r23_a234)):
            tp_r23_a234 += 1

        if user == np.argmin(np.array(r234_a2)):
            tp_r234_a2 += 1
        if user == np.argmin(np.array(r234_a23)):
            tp_r234_a23 += 1
        if user == np.argmin(np.array(r234_a24)):
            tp_r234_a24 += 1
        if user == np.argmin(np.array(r234_a234)):
            tp_r234_a234 += 1

    return tp_a2, tp_a3, tp_a4, tp_a23, tp_a24, tp_a34, tp_a234, tp_r2, tp_r3, tp_r4, tp_r23, tp_r24, tp_r34, tp_r234, tp_r2_a2, tp_r2_a23, tp_r2_a24, tp_r2_a234, tp_r23_a2, tp_r23_a23, tp_r23_a24, tp_r23_a234, tp_r234_a2, tp_r234_a23, tp_r234_a24, tp_r234_a234


In [None]:
def classify_user_for_authentication(user_profiles, user_sample, user_index, profile_index):
    r234_a23 = []
    for u in range(0,len(user_profiles)):
        profile_digraphs = [key for i, key in enumerate(user_profiles[u]["digraphs"]) if not (user_index == u and profile_index == i)]
        profile_trigraphs = [key for i, key in enumerate(user_profiles[u]["trigraphs"]) if not (user_index == u and profile_index == i) ]
        profile_fourgraphs = [key for i, key in enumerate(user_profiles[u]["fourgraphs"])  if not (user_index == u and profile_index == i)]
        a_distances = get_a_distances(profile_digraphs, profile_trigraphs, profile_fourgraphs, user_sample["digraphs"],user_sample["trigraphs"],user_sample["fourgraphs"])
        r_distances = get_r_distances(profile_digraphs, profile_trigraphs, profile_fourgraphs, user_sample["digraphs"],user_sample["trigraphs"],user_sample["fourgraphs"])
        r234_a23.append(r_distances[0]+r_distances[1]+r_distances[2] + a_distances[0]+a_distances[1])
    return np.argmin(np.array(r234_a23))

In [None]:
def authentication_test_legal_connection(user_profiles_training, user_profiles_evaluation):
    false_reject_classification = 0
    false_reject_distance = 0
    attempt = 0

    for index_test_user, test_user in enumerate(user_profiles_evaluation):
        print("Start legal attempt user "+str(index_test_user))

        for sample_test_user in range(0,15):
            attempt += 1
            classified_user = classify_user_for_authentication(user_profiles_training, {"digraphs": test_user["digraphs"][sample_test_user], "trigraphs": test_user["trigraphs"][sample_test_user] , "fourgraphs": test_user["fourgraphs"][sample_test_user]}, index_test_user, sample_test_user)
            if classified_user == index_test_user:
                m_d_a = get_mean_a_distance([key for i, key in enumerate(test_user["digraphs"]) if i != sample_test_user ], [key for i, key in enumerate(test_user["trigraphs"]) if i != sample_test_user ],[key for i, key in enumerate(test_user["fourgraphs"]) if i != sample_test_user ])
                m_d_r = get_mean_r_distance([key for i, key in enumerate(test_user["digraphs"]) if i != sample_test_user ], [key for i, key in enumerate(test_user["trigraphs"]) if i != sample_test_user ],[key for i, key in enumerate(test_user["fourgraphs"]) if i != sample_test_user ])
                mean_distances_user_A = m_d_r[0]+m_d_r[1]+ m_d_r[2] + m_d_a[1]+m_d_a[2]                

                mean_distance_profile_test_user = calculate_distances([key for i, key in enumerate(test_user["digraphs"])if i != sample_test_user ] , [key for i, key in enumerate(test_user["trigraphs"]) if i != sample_test_user] , [key for i, key in enumerate(test_user["fourgraphs"]) if i != sample_test_user],  test_user["digraphs"][sample_test_user],  test_user["trigraphs"][sample_test_user] , test_user["fourgraphs"][sample_test_user] )

                for index_user_B, user_B in enumerate(user_profiles_training):
                    if index_user_B != index_test_user:
                        mean_distance_sample_user_B = calculate_distances([key for i, key in enumerate(user_B["digraphs"]) ] , [key for i, key in enumerate(user_B["trigraphs"])], [key for i, key in enumerate(user_B["fourgraphs"]) ],  test_user["digraphs"][sample_test_user],  test_user["trigraphs"][sample_test_user] , test_user["fourgraphs"][sample_test_user] )
                        if mean_distance_profile_test_user["r234_a23"] >= mean_distances_user_A + (0.5 * (mean_distance_sample_user_B["r234_a23"] - mean_distances_user_A)):
                            false_reject_distance += 1
                            break
            else:
                false_reject_classification += 1
    print("Attempts: "+str(attempt)+" FR_Classification: "+str(false_reject_classification)+" FR_Distance: "+str(false_reject_distance))
    return (attempt, false_reject_classification+false_reject_distance)

In [None]:
def authentication_test_attack(user_profiles_training, user_profile_evaluation):
    false_accept = 0
    attempts = 0

    for index_attack_user, attack_user in enumerate(user_profiles_training[0:1]):
        print("Start attack on user " + str(index_attack_user))


        attacked_users = [user for i, user in enumerate(user_profile_evaluation) if i != index_attack_user]

        for sample_attack in range(0,15):

            for index_attacked_user, attacked_user in enumerate(attacked_users):

                attempts+=1

                classified_user = classify_user_for_authentication(attacked_users, {"digraphs": attack_user["digraphs"][sample_attack], "trigraphs": attack_user["trigraphs"][sample_attack] , "fourgraphs": attack_user["fourgraphs"][sample_attack]}, -1, -1)

                if classified_user == index_attacked_user:
                    m_d_a = get_mean_a_distance([key for i, key in enumerate(attacked_user["digraphs"])  ], [key for i, key in enumerate(attacked_user["trigraphs"])  ],[key for i, key in enumerate(attacked_user["fourgraphs"])  ])
                    m_d_r = get_mean_r_distance([key for i, key in enumerate(attacked_user["digraphs"])  ], [key for i, key in enumerate(attacked_user["trigraphs"])  ],[key for i, key in enumerate(attacked_user["fourgraphs"])  ])
                    mean_distances_user_A = m_d_r[0]+m_d_r[1]+ m_d_r[2] + m_d_a[1]+m_d_a[2]  

                    mean_distance_profile_test_user = calculate_distances([key for i, key in enumerate(attacked_user["digraphs"]) ] , [key for i, key in enumerate(attacked_user["trigraphs"]) ] , [key for i, key in enumerate(attacked_user["fourgraphs"]) ],  attack_user["digraphs"][sample_attack],  attack_user["trigraphs"][sample_attack] , attack_user["fourgraphs"][sample_attack] )
                 
                    for index_user_B, user_B in enumerate(attacked_users):
                        if index_user_B != index_attacked_user:
                            mean_distance_sample_user_B = calculate_distances([key for i, key in enumerate(user_B["digraphs"]) ] , [key for i, key in enumerate(user_B["trigraphs"])], [key for i, key in enumerate(user_B["fourgraphs"]) ],  attack_user["digraphs"][sample_attack],  attack_user["trigraphs"][sample_attack] , attack_user["fourgraphs"][sample_attack] )
                            if mean_distance_profile_test_user["r234_a23"] < mean_distances_user_A + (0.5 * (mean_distance_sample_user_B["r234_a23"] - mean_distances_user_A)):
                                false_accept += 1
                                
                                break
            
                
    return (attempts, false_accept)

In [None]:
def classify_dataset_28(path_to_dataset_training, path_to_dataset_evaluation, filename):
    with open(path_to_dataset_training, "rb") as fp:
        user_profiles_training = pickle.load(fp)
    with open(path_to_dataset_evaluation, "rb") as fp:
        user_profiles_evaluation = pickle.load(fp)


    # remove row 13, 18, 26
    filter =  [13, 18, 26]
    user_profiles_training = [j for i, j in enumerate(user_profiles_training) if i not in filter]
    user_profiles_evaluation = [j for i, j in enumerate(user_profiles_evaluation) if i not in filter]
    assert len(user_profiles_training) == len(user_profiles_evaluation) == 28


    classifications = []
    print("start classfication:")
    for i in range(len(user_profiles_training)):
        print("start classification for user " + str(i))


        classifications.append(classify_user_experiment(user_profiles_training, user_profiles_evaluation, i))
        df = pd.DataFrame(classifications, columns=['a2','a3','a4','a23','a24','a34','a234','r2','r3','r4','r23','r24','r34','r234', 'r2_a2', 'r2_a23', 'r2_a24', 'r2_a234', 'r23_a2', 'r23_a23', 'r23_a24', 'r23_a234', 'r234_a2', 'r234_a23', 'r234_a24', 'r234_a234' ])
        sums = df.sum().to_frame()

        sums["Missclassifications"] = (15 * (i + 1)) - sums.iloc[:,0]
        sums["Error"] = (100 / (15 * (i + 1))) * sums["Missclassifications"]
        sums.to_csv('./__DATA_28/'+ filename + '_classification_performance.csv')
        df.to_csv('./__DATA_28/' + filename + '_classification.csv')


In [None]:
def authenticate_dataset_28(path_to_dataset_training, path_to_dataset_evaluation, filename):
    with open(path_to_dataset_training, "rb") as fp:
        user_profiles_training = pickle.load(fp)
    with open(path_to_dataset_evaluation, "rb") as fp:
        user_profiles_evaluation = pickle.load(fp)


    # remove row 13, 18, 26
    filter =  [13, 18, 26]
    user_profiles_training = [j for i, j in enumerate(user_profiles_training) if i not in filter]
    user_profiles_evaluation = [j for i, j in enumerate(user_profiles_evaluation) if i not in filter]
    assert len(user_profiles_training) == len(user_profiles_evaluation) == 28


    legal_attempts = authentication_test_legal_connection(user_profiles_training, user_profiles_evaluation)
    attacks = authentication_test_attack(user_profiles_training, user_profiles_evaluation)
    
    df = pd.DataFrame(data={'Type':['False Reject', 'False Accept'], 'Attempts':[legal_attempts[0],attacks[0]], 'Result':[legal_attempts[1],attacks[1]]})
    df.to_csv('./__DATA_28/' + filename + "_authentication.csv", index=False)



## Execute experiments

In [None]:
# read in original data
original_set = './FreeText-Dataset-31-USERS.csv'
original_data_profiles = './__DATA/original_data_profiles'

In [None]:
# original data
if not os.path.isfile(original_data_profiles):
    create_user_profiles(original_set, original_data_profiles)

authenticate_dataset_28(original_data_profiles, original_data_profiles, "original")
classify_dataset_28(original_data_profiles, original_data_profiles, "original")

In [None]:
# chrome isolated
chrome_isolated_set = './chrome/isolated/complete.csv'
chrome_isolated_data_profiles = './__DATA/chrome_isolated_data_profiles'

if not os.path.isfile(chrome_isolated_data_profiles):
    create_user_profiles(chrome_isolated_set, chrome_isolated_data_profiles)

authenticate_dataset_28(original_data_profiles, chrome_isolated_data_profiles, "chrome_isolated")
classify_dataset_28(original_data_profiles, chrome_isolated_data_profiles, "chrome_isolated")

In [None]:
# chrome unisolated
chrome_unisolated_set = './chrome/unisolated/complete.csv'
chrome_unisolated_data_profiles = './__DATA/chrome_unisolated_data_profiles'

if not os.path.isfile(chrome_unisolated_data_profiles):
    create_user_profiles(chrome_unisolated_set, chrome_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, chrome_unisolated_data_profiles, "chrome_unisolated")
classify_dataset_28(original_data_profiles, chrome_unisolated_data_profiles, "chrome_unisolated")

In [None]:
# chromium isolated
chromium_isolated_set = './chromium/isolated/complete.csv'
chromium_isolated_data_profiles = './__DATA/chromium_isolated_data_profiles'

if not os.path.isfile(chromium_isolated_data_profiles):
    create_user_profiles(chromium_isolated_set, chromium_isolated_data_profiles)

authenticate_dataset_28(original_data_profiles, chromium_isolated_data_profiles, "chromium_isolated")
classify_dataset_28(original_data_profiles, chromium_isolated_data_profiles, "chromium_isolated")

In [None]:
# chromium unisolated
chromium_unisolated_set = './chromium/unisolated/complete.csv'
chromium_unisolated_data_profiles = './__DATA/chromium_unisolated_data_profiles'

if not os.path.isfile(chromium_unisolated_data_profiles):
    create_user_profiles(chromium_unisolated_set, chromium_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, chromium_unisolated_data_profiles, "chromium_unisolated")
classify_dataset_28(original_data_profiles, chromium_unisolated_data_profiles, "chromium_unisolated")

In [None]:
# edge isolated
edge_isolated_set = './edge/isolated/complete.csv'
edge_isolated_data_profiles = './__DATA/edge_isolated_data_profiles'

if not os.path.isfile(edge_isolated_data_profiles):
    create_user_profiles(edge_isolated_set, edge_isolated_data_profiles)

authenticate_dataset_28(original_data_profiles, edge_isolated_data_profiles, "edge_isolated")
classify_dataset_28(original_data_profiles, edge_isolated_data_profiles, "edge_isolated")

In [None]:
# edge unisolated
edge_unisolated_set = './edge/unisolated/complete.csv'
edge_unisolated_data_profiles = './__DATA/edge_unisolated_data_profiles'

if not os.path.isfile(edge_unisolated_data_profiles):
    create_user_profiles(edge_unisolated_set, edge_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, edge_unisolated_data_profiles, "edge_unisolated")
classify_dataset_28(original_data_profiles, edge_unisolated_data_profiles, "edge_unisolated")

In [None]:
# tor unisolated
tor_unisolated_set = './tor/unisolated/complete.csv'
tor_unisolated_data_profiles = './__DATA/tor_unisolated_data_profiles'

if not os.path.isfile(tor_unisolated_data_profiles):
    create_user_profiles(tor_unisolated_set, tor_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, tor_unisolated_data_profiles, "tor_unisolated")
classify_dataset_28(original_data_profiles, tor_unisolated_data_profiles, "tor_unisolated")

In [None]:
# firefox isolated
firefox_isolated_set = './firefox/isolated/complete.csv'
firefox_isolated_data_profiles = './__DATA/firefox_isolated_data_profiles'

if not os.path.isfile(firefox_isolated_data_profiles):
    create_user_profiles(firefox_isolated_set, firefox_isolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_isolated_data_profiles, "firefox_isolated")
classify_dataset_28(original_data_profiles, firefox_isolated_data_profiles, "firefox_isolated")

In [None]:
# firefox unisolated
firefox_unisolated_set = './firefox/unisolated/complete.csv'
firefox_unisolated_data_profiles = './__DATA/firefox_unisolated_data_profiles'

if not os.path.isfile(firefox_unisolated_data_profiles):
    create_user_profiles(firefox_unisolated_set, firefox_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_unisolated_data_profiles, "firefox_unisolated")
classify_dataset_28(original_data_profiles, firefox_unisolated_data_profiles, "firefox_unisolated")

In [None]:
# firefox isolated resistFP
firefox_resistFP_isolated_set = './firefox_rf/isolated/complete.csv'
firefox_resistFP_isolated_data_profiles = './__DATA/firefox_resistFP_isolated_data_profiles'

if not os.path.isfile(firefox_resistFP_isolated_data_profiles):
    create_user_profiles(firefox_resistFP_isolated_set, firefox_resistFP_isolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_isolated_data_profiles, "firefox_resistFP_isolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_isolated_data_profiles, "firefox_resistFP_isolated")

In [None]:
# firefox unisolated resistFP
firefox_resistFP_unisolated_set = './firefox_rf/unisolated/complete.csv'
firefox_resistFP_unisolated_data_profiles = './__DATA/firefox_resistFP_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_unisolated_set, firefox_resistFP_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_unisolated_data_profiles, "firefox_resistFP_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_unisolated_data_profiles, "firefox_resistFP_unisolated")

In [None]:
# firefox unisolated resistFP 20ms
firefox_resistFP_20ms_unisolated_set = './firefox_rf_20/unisolated/complete.csv'
firefox_resistFP_20ms_unisolated_data_profiles = './__DATA/firefox_resistFP_20ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_20ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_20ms_unisolated_set, firefox_resistFP_20ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_20ms_unisolated_data_profiles, "firefox_resistFP_20ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_20ms_unisolated_data_profiles, "firefox_resistFP_20ms_unisolated")

In [None]:
# firefox unisolated resistFP 33ms
firefox_resistFP_33ms_unisolated_set = './firefox_rf_33/unisolated/complete.csv'
firefox_resistFP_33ms_unisolated_data_profiles = './__DATA/firefox_resistFP_33ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_33ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_33ms_unisolated_set, firefox_resistFP_33ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_33ms_unisolated_data_profiles, "firefox_resistFP_33ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_33ms_unisolated_data_profiles, "firefox_resistFP_33ms_unisolated")

In [None]:
# firefox unisolated resistFP 40ms
firefox_resistFP_40ms_unisolated_set = './firefox_rf_40/unisolated/complete.csv'
firefox_resistFP_40ms_unisolated_data_profiles = './__DATA/firefox_resistFP_40ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_40ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_40ms_unisolated_set, firefox_resistFP_40ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_40ms_unisolated_data_profiles, "firefox_resistFP_40ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_40ms_unisolated_data_profiles, "firefox_resistFP_40ms_unisolated")

In [None]:
# firefox unisolated resistFP 60ms
firefox_resistFP_60ms_unisolated_set = './firefox_rf_60/unisolated/complete.csv'
firefox_resistFP_60ms_unisolated_data_profiles = './__DATA/firefox_resistFP_60ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_60ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_60ms_unisolated_set, firefox_resistFP_60ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_60ms_unisolated_data_profiles, "firefox_resistFP_60ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_60ms_unisolated_data_profiles, "firefox_resistFP_60ms_unisolated")

In [None]:
# firefox unisolated resistFP 80ms
firefox_resistFP_80ms_unisolated_set = './firefox_rf_80/unisolated/complete.csv'
firefox_resistFP_80ms_unisolated_data_profiles = './__DATA/firefox_resistFP_80ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_80ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_80ms_unisolated_set, firefox_resistFP_80ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_80ms_unisolated_data_profiles, "firefox_resistFP_80ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_80ms_unisolated_data_profiles, "firefox_resistFP_80ms_unisolated")

In [None]:
# firefox unisolated resistFP 100ms
firefox_resistFP_100ms_unisolated_set = './firefox_rf_100/unisolated/complete.csv'
firefox_resistFP_100ms_unisolated_data_profiles = './__DATA/firefox_resistFP_100ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_100ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_100ms_unisolated_set, firefox_resistFP_100ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_100ms_unisolated_data_profiles, "firefox_resistFP_100ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_100ms_unisolated_data_profiles, "firefox_resistFP_100ms_unisolated")

In [None]:
# firefox unisolated resistFP 120ms
firefox_resistFP_120ms_unisolated_set = './firefox_rf_120/unisolated/complete.csv'
firefox_resistFP_120ms_unisolated_data_profiles = './__DATA/firefox_resistFP_120ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_120ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_120ms_unisolated_set, firefox_resistFP_120ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_120ms_unisolated_data_profiles, "firefox_resistFP_120ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_120ms_unisolated_data_profiles, "firefox_resistFP_120ms_unisolated")

In [None]:
# firefox unisolated resistFP 140ms
firefox_resistFP_140ms_unisolated_set = './firefox_rf_140/unisolated/complete.csv'
firefox_resistFP_140ms_unisolated_data_profiles = './__DATA/firefox_resistFP_140ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_140ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_140ms_unisolated_set, firefox_resistFP_140ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_140ms_unisolated_data_profiles, "firefox_resistFP_140ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_140ms_unisolated_data_profiles, "firefox_resistFP_140ms_unisolated")

In [None]:
# firefox unisolated resistFP 160ms
firefox_resistFP_160ms_unisolated_set = './firefox_rf_160/unisolated/complete.csv'
firefox_resistFP_160ms_unisolated_data_profiles = './__DATA/firefox_resistFP_160ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_160ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_160ms_unisolated_set, firefox_resistFP_160ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_160ms_unisolated_data_profiles, "firefox_resistFP_160ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_160ms_unisolated_data_profiles, "firefox_resistFP_160ms_unisolated")

In [None]:
# firefox unisolated resistFP 180ms
firefox_resistFP_180ms_unisolated_set = './firefox_rf_180/unisolated/complete.csv'
firefox_resistFP_180ms_unisolated_data_profiles = './__DATA/firefox_resistFP_180ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_180ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_180ms_unisolated_set, firefox_resistFP_180ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_180ms_unisolated_data_profiles, "firefox_resistFP_180ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_180ms_unisolated_data_profiles, "firefox_resistFP_180ms_unisolated")

In [None]:
# firefox unisolated resistFP 200ms
firefox_resistFP_200ms_unisolated_set = './firefox_rf_200/unisolated/complete.csv'
firefox_resistFP_200ms_unisolated_data_profiles = './__DATA/firefox_resistFP_200ms_unisolated_data_profiles'

if not os.path.isfile(firefox_resistFP_200ms_unisolated_data_profiles):
    create_user_profiles(firefox_resistFP_200ms_unisolated_set, firefox_resistFP_200ms_unisolated_data_profiles)

authenticate_dataset_28(original_data_profiles, firefox_resistFP_200ms_unisolated_data_profiles, "firefox_resistFP_200ms_unisolated")
classify_dataset_28(original_data_profiles, firefox_resistFP_200ms_unisolated_data_profiles, "firefox_resistFP_200ms_unisolated")