In [None]:
# imports
import os
import pickle
import numpy as np
import pandas as pd 
from typing import Self
from copy import deepcopy
from collections import defaultdict

minimum_profile_length_r: int = 5
minimum_profile_length_a: int = 5
auth_distance_measure: str = 'r234_a23'

# control log level
debug: bool = True
verbose: bool = False
trace: bool = False

In [None]:
# create and read user profiles
def keystrokes_to_digraphs(keystroke_array):
    digraphs = []
    i = 0    
    while i < len(keystroke_array) - 1:
        digraphs.append((
            str(keystroke_array[i][0]) +"-"+ str(keystroke_array[i+1][0]),
            np.round((keystroke_array[i+1][1]-keystroke_array[i][1]), 5)
            ))
        i +=1
    return digraphs

def keystrokes_to_trigraphs(keystroke_array):
    trigraphs = []
    i = 0
    while i < len(keystroke_array) - 2:   
        trigraphs.append((
            str(keystroke_array[i][0])+"-"+str(keystroke_array[i+1][0]) + "-" + str(keystroke_array[i+2][0]), 
            np.round((keystroke_array[i+2][1]-keystroke_array[i][1]), 5)
            ))
        i +=1
    return trigraphs

def keystrokes_to_fourgraphs(keystroke_array):
    fourgraphs = []
    i = 0
    while i < len(keystroke_array) -3:
        fourgraphs.append((
            str(keystroke_array[i][0]) + "-" + str(keystroke_array[i+1][0]) + "-" + str(keystroke_array[i+2][0]) + "-" + str(keystroke_array[i+3][0]), 
            np.round((keystroke_array[i+3][1] - keystroke_array[i][1]), 5)
        ))
        i += 1
    return fourgraphs


def calculate_mean_for_duplicates(ngraphs):
    cleaned_ngraphs = []
    processed_keys = []
    for key, time in ngraphs:
        if key not in processed_keys:
            duplicates = [e for e in ngraphs if e[0] == key ]
            if len(duplicates) > 1:
                processed_keys.append(key)
                cleaned_ngraphs.append((key, np.round(np.mean([d[1] for d in duplicates]), 5)))
            else :
                processed_keys.append(key)
                cleaned_ngraphs.append((key,time))
    return cleaned_ngraphs

def create_user_profile(keystroke_sequence):
    digraphs = calculate_mean_for_duplicates(keystrokes_to_digraphs(keystroke_sequence))
    trigraphs = calculate_mean_for_duplicates(keystrokes_to_trigraphs(keystroke_sequence))
    fourgraphs = calculate_mean_for_duplicates(keystrokes_to_fourgraphs(keystroke_sequence))
    return digraphs, trigraphs, fourgraphs


def read_file(complete: pd.DataFrame, user: int, set: int) -> list[(str, int)]:
    key_codes = complete.loc[(complete['user'] == user) & (complete['set'] == set)]['key'].to_list()
    timestamps =complete.loc[(complete['user'] == user) & (complete['set'] == set)]['timestamp'].to_list()

    keystrokes = [(str(k), t) for (k,t) in zip(key_codes, timestamps)]

    return keystrokes

def read_user_data(complete):
    users = []
    
    for user in range(1, 32):
        tmp_keystrokes = []
        for set in range(1, 16):
            f = read_file(complete, user, set)
            tmp_keystrokes.append(f)
        users.append(tmp_keystrokes)
    return users

def get_user_profiles(user_data):
    user_profiles = []
    count = 0
    for u_data in user_data:
        digraphs = []
        trigraphs =[]
        fourgraphs = []
        for sample in u_data:
            tmp_digraphs, tmp_trigraphs, tmp_fourgraphs = create_user_profile(sample)
            digraphs.append(dict(tmp_digraphs))
            trigraphs.append(dict(tmp_trigraphs))
            fourgraphs.append(dict(tmp_fourgraphs))

        user_profiles.append({"digraphs": digraphs, "trigraphs": trigraphs, "fourgraphs": fourgraphs})
        count += 1
    return user_profiles

def create_user_profiles(path_to_userdata, filename):
    user_data2 = read_user_data(pd.read_csv(path_to_userdata))
    user_profiles = get_user_profiles(user_data2)
    with open(filename, "wb") as fp:
        pickle.dump(user_profiles, fp)

In [None]:
# abstractions
class Sample:
    def __init__(self, digraphs: dict[str, float], trigraphs: dict[str, float], fourgraphs: dict[str, float]):
        self.digraphs = digraphs
        self.trigraphs = trigraphs
        self.fourgraphs = fourgraphs
        
    def __str__(self):
        return f'digraphs: {self.digraphs} trigraphs: {self.trigraphs} fourgraphs: {self.fourgraphs}'

    def get_intersection(self, other: Self) -> Self:
        intersection_digraphs = self.digraphs.keys() & other.digraphs.keys()
        intersection_trigraphs = self.trigraphs.keys() & other.trigraphs.keys()
        intersection_fourgraphs = self.fourgraphs.keys() & other.fourgraphs.keys()


        s_digraphs = {k: v for k, v in self.digraphs.items() if k in intersection_digraphs}
        s_trigraphs = {k: v for k, v in self.trigraphs.items() if k in intersection_trigraphs}
        s_fourgraphs = {k: v for k, v in self.fourgraphs.items() if k in intersection_fourgraphs}

        return Sample(s_digraphs, s_trigraphs, s_fourgraphs)

    def get_digraphs(self) -> dict[str, float]:
        return self.digraphs
    
    def get_trigraphs(self) -> dict[str, float]:
        return self.trigraphs
    
    def get_fourgraphs(self) -> dict[str, float]:
        return self.fourgraphs


class UserProfile:
    def __init__(self, profile: dict[str, list[dict]]):

        assert len(profile['digraphs']) == len(profile['trigraphs']) == len(profile['fourgraphs'])
        
        self.digraphs = profile['digraphs']
        self.trigraphs = profile['trigraphs']
        self.fourgraphs = profile['fourgraphs']

    def __str__(self):
        return f'digraphs: {self.digraphs} trigraphs: {self.trigraphs} fourgraphs: {self.fourgraphs}'

    def get_sample_count(self) -> int:
        return len(self.digraphs)

    def get_sample(self, index: int) -> Sample:
        return Sample(self.digraphs[index], self.trigraphs[index], self.fourgraphs[index])
    
    def get_without_sample(self, index: int) -> Self:
        out = deepcopy(self)
        
        del out.digraphs[index]
        del out.trigraphs[index]
        del out.fourgraphs[index]
        return out

    def get_samples(self) -> list[Sample]:
        out = [self.get_sample(i) for i in range(self.get_sample_count())]
        return out

    def m(self) -> float:
        samples: list[Sample] = self.get_samples()

        distances: dict[str, list[float]] = defaultdict(list)

        # calculate distances from each set in profile
        for i, sample_A in enumerate(samples):
            for j, sample_B in enumerate(samples):
                # distance from same sample does not have to be calculated
                if j == i:
                    assert sample_A == sample_B
                    continue
                
                distance_combinations: dict[str, float] = d(sample_A, sample_B)

                # append each distance to distances
                for key, value in distance_combinations.items():
                    distances[key].append(value)

        # calculate mean for each distance and return
        return {k: np.array(v).mean() for k, v in distances.items()}
        


In [None]:
# basic distances
def a_distance(sample_A_ngraphs: dict[str, float], sample_B_ngraphs: dict[str, float], threshold: float = 1.05) -> float:

    assert len(sample_A_ngraphs) == len(sample_B_ngraphs)


    # check that a minimal number of digraphs are shared
    number_of_shared_ngraphs = len(sample_A_ngraphs)
    if number_of_shared_ngraphs < minimum_profile_length_a:
        if trace: print(f'[TRACE]: Insufficient number of n-graphs: {number_of_shared_ngraphs}')
        return 1
    
    similar_ngraphs:int = 0

    # for each n-graph
    for n_graph in sample_A_ngraphs:

        d1: float = sample_A_ngraphs[n_graph]
        d2: float = sample_B_ngraphs[n_graph]

        # if distance for two inputs is 0, 
        # set two very small number, to prevent division by 0
        # TODO: is this ok?
        if d1 == 0:
            d1 = 0.0000001
        
        if d2 == 0:
            d2 = 0.0000001

        # 1 < max(d1, d2)/min(d1, d2) ≤ t
        if 1 < max(d1, d2) / min(d1, d2) <= threshold:
            similar_ngraphs += 1
            
    distance:float = 1 - (similar_ngraphs / number_of_shared_ngraphs)

    return np.round(distance, 6)

def r_distance(sample_A_ngraphs: dict[str, float], sample_B_ngraphs: dict[str, float]) -> float:
    assert len(sample_A_ngraphs) == len(sample_B_ngraphs)

    # check that a minimal number of digraphs are shared
    number_of_shared_ngraphs = len(sample_A_ngraphs)
    if number_of_shared_ngraphs < minimum_profile_length_a:
        if trace: print(f'[TRACE]: Insufficient number of n-graphs: {number_of_shared_ngraphs}')
        return 1


    # order refernce(user profile) n-graphs based on n-grpah duration
    sample_A_ngraphs_sorted = list(dict(sorted(sample_A_ngraphs.items(), key= lambda item: item[1])))
   
    # order sample n-graphs based on n-grpah duration
    sample_B_ngraphs_sorted = list(dict(sorted(sample_B_ngraphs.items(), key= lambda item: item[1])))
    
    # calculate distances between n-graph positions in reference and evaluation datasets
    ordered_distances = [abs(sample_A_ngraphs_sorted.index(ele) - idx) for idx, ele in enumerate(sample_B_ngraphs_sorted)]
    
    # calculate maximum degree of disorder
    # (if |V| is even) 0> (|V|^2 / 2)
    if number_of_shared_ngraphs % 2 == 0:
        maximum_disorder = ((number_of_shared_ngraphs * number_of_shared_ngraphs)) / 2
    # (if |V| is odd) => (|V|^2 − 1) / 2
    else:
        maximum_disorder = ((number_of_shared_ngraphs * number_of_shared_ngraphs) - 1) / 2

    # calculate r-distance
    distance = np.sum(ordered_distances) / maximum_disorder

    return np.round(distance, 6)

In [None]:
# generic d(distance) and md(mean distance) functions
def d(sample_A: Sample, sample_B: Sample) -> dict[str, float]:


    # get shared n-graphs
    shared_sample_A = sample_A.get_intersection(sample_B)
    shared_sample_B = sample_B.get_intersection(sample_A)

    assert shared_sample_A.get_digraphs().keys() == shared_sample_B.get_digraphs().keys()
    assert shared_sample_A.get_trigraphs().keys() == shared_sample_B.get_trigraphs().keys()
    assert shared_sample_A.get_fourgraphs().keys() == shared_sample_B.get_fourgraphs().keys()


    # get basic distances
    a2 = a_distance(shared_sample_A.get_digraphs(), shared_sample_B.get_digraphs())
    a3 = a_distance(shared_sample_A.get_trigraphs(), shared_sample_B.get_trigraphs())
    a4 = a_distance(shared_sample_A.get_fourgraphs(), shared_sample_B.get_fourgraphs())

    r2 = r_distance(shared_sample_A.get_digraphs(), shared_sample_B.get_digraphs())
    r3 = r_distance(shared_sample_A.get_trigraphs(), shared_sample_B.get_trigraphs())
    r4 = r_distance(shared_sample_A.get_fourgraphs(), shared_sample_B.get_fourgraphs())

    # will contain all combinations of a- and r-distances
    out: dict[str, float] = {}

    out['a2'] = a2
    out['a3'] = a3
    out['a4'] = a4

    out['r2'] = r2
    out['r3'] = r3
    out['r4'] = r4

    out['a23'] =  a2 + a3
    out['a34'] =  a3 + a4
    out['a234'] = a2 + a3 + a4

    out['r23'] =  r2 + r3
    out['r34'] =  r3 + r4
    out['r234'] = r2 + r3 + r4

    out['r2_a2'] = r2 + a2
    out['r3_a3'] = r3 + a3
    out['r4_a4'] = r4 + a4

    out['r23_a23'] = r2 + r3 + a2 + a3
    out['r34_a34'] = r3 + r4 + a3 + a4
    out['r24_a24'] = r2 + r4 + a2 + a4

    out['r234_a234'] = r2 + r3 + r4 + a2 + a3 + a4

    out['r2_a3'] = r2 + a3
    out['r2_a4'] = r2 + a4

    out['r3_a2'] = r3 + a2
    out['r4_a2'] = r4 + a2

    out['r23_a2'] = r2 + r3 + a2
    out['r23_a3'] = r2 + r3 + a3
    out['r23_a4'] = r2 + r3 + a4

    out['r234_a2'] = r2 + r3 + r4 + a2
    out['r234_a3'] = r2 + r3 + r4 + a3
    out['r234_a4'] = r2 + r3 + r4 + a4

    out['r234_a23'] =  r2 + r3 + r4 + a2 + a3

    out['r2_a234'] = r2 + a2 + a3 + a4
    out['r3_a234'] = r3 + a2 + a3 + a4
    out['r4_a234'] = r4 + a2 + a3 + a4

    out['r23_a234'] =  r2 + r3 + a2 + a3 + a4
    out['r34_a234'] =  r3 + r4 + a2 + a3 + a4

    return out

def md(user: UserProfile, sample: Sample) -> dict[str, float]:
    '''
    Calculates the mean distances between the user profile and the sample.

            Parameters:
                    user (UserProfile): A user profile to calculate the distance to
                    sample (Sample): A sample to calculate the distance from

            Returns:
                    index (dict[str, float]): The mean distance combinations
    '''
    assert isinstance(user, UserProfile), f'Wrong input type: {type(user)}'
    assert isinstance(sample, Sample), f'Wrong input type: {type(sample)}'



    distances: dict[str, list[float]] = defaultdict(list)

    # calculate distance to each set from user profile
    for user_sample in user.get_samples():
        distance_combinations: dict[str, float] = d(user_sample, sample)

        # append each distance to distances
        for key, value in distance_combinations.items():
            distances[key].append(value)

    # calculate mean for each distance and return
    mean_distances: dict[str, float] = {k: np.array(v).mean() for k, v in distances.items()}
    return mean_distances

In [None]:
# user classification
def user_classification(distances: dict[str, list[float]], distance_measure: str) -> int:
    
    # will contains the r234_a23 distance for each user 
    user_distances = [user_distance for user_distance in distances[distance_measure]]

    # returns the index(user) with the minimal distance
    return np.argmin(user_distances)

In [None]:
# authentification legal
def authentication_test_legal_connection(user_profiles_training: list[UserProfile], user_profiles_evaluation: list[UserProfile]) -> tuple[int, int]:
    # keep track of results
    false_reject_classification: int = 0
    false_reject_distance: int = 0
    attempt: int = 0

    assert type(user_profiles_evaluation) == list,  type(user_profiles_evaluation)

    # for each user, try to authenticate with each sample
    for test_user_index, test_user in enumerate(user_profiles_evaluation):
        print("Start legal attempt user " + str(test_user_index))


        assert type(test_user) == UserProfile,  type(test_user)

        # try to classify user with each set as test sample
        for sample_index in range(0, test_user.get_sample_count()):
            attempt += 1

            # get sample to test
            test_sample = test_user.get_sample(sample_index)

            # remove test sample from profile
            # test_profile_removed = test_user.get_without_sample(sample_index)

            # create copy of list and swap test user profile
            #user_profiles = deepcopy(user_profiles_training)
            #user_profiles[test_user_index] = test_profile_removed

            user_profiles = user_profiles_training

            # calculate distances from sample to user profiles
            distances: list[dict[str, float]] = [md(user_profile, test_sample) for user_profile in user_profiles]

            # convert list of dicts to dict of lists
            distances_c: dict[str, list[float]] = defaultdict(list)
            for entry in distances:
                for key, value in entry.items():
                    distances_c[key].append(value)
            
            # process calculated distances
            classified_user_index = user_classification(distances_c, auth_distance_measure)


            # if wrongly classified, continue with next set
            if classified_user_index != test_user_index:
                false_reject_classification += 1
                continue

            # classification was successful
            else:
                if trace: print(f"[TRACE] Sucessfully classified: Set {sample_index}")

                # 
                m_A = test_user.m()[auth_distance_measure]
                md_A_X = distances[test_user_index][auth_distance_measure]

                # check, that distance is close enought
                for user_B_distances in distances:
                    md_B_X = user_B_distances[auth_distance_measure]

                    # md(A, X) < m(A) + 0.5 * (md(B,X) - m(A))
                    if md_A_X >= m_A + 0.5 * (md_B_X - m_A):
                        false_reject_distance += 1
                        if verbose: print("[WARN ] Authentification failure: distance too big")
                        break
                else:
                    if verbose: print(f"[INFO ] Successfully authenticated: Set {sample_index}")   


    print("Attempts: "+ str(attempt) + " FR_Classification: "+ str(false_reject_classification) + " FR_Distance: " + str(false_reject_distance))
    return (attempt, false_reject_classification + false_reject_distance)

In [None]:
# authentification imposter
def authentication_test_attack(attacked_user_profiles: list[UserProfile], attacker_user_profiles: list[UserProfile]) -> tuple[int, int]:
    false_accept: int = 0
    attempts: int = 0

    # with each sample from attacker, try to authenticate as each user
    for attacker_user_index, attacker_user in enumerate(attacker_user_profiles):
        print(f"Start attack from user: {attacker_user_index}")
        for attacked_user_index, attacked_user in enumerate(attacked_user_profiles):
            if verbose: print(f"Attacking user: {attacked_user_index}")

            # user should't attack himself
            if attacked_user_index == attacker_user_index: continue

            # try to authenticate on attacked profile with each sample from attacker
            for attacker_sample_index, attacker_sample in enumerate(attacker_user.get_samples()):
                if verbose: print(f"Attack using set: {attacker_sample_index}")
                attempts += 1 

                # calculate distances from sample to user profiles
                distances: list[dict[str, float]] = [md(user_profile, attacker_sample) for user_profile in attacked_user_profiles]

                # transform from list of dicts to dict of lists
                distances_c: dict[str, list[float]] = defaultdict(list)
                for entry in distances:
                    for key, value in entry.items():
                        distances_c[key].append(value)

                # find user with smallest distance
                classified_user_index = user_classification(distances_c, auth_distance_measure)

                # if wrongly classified, continue with next sample
                if classified_user_index != attacked_user_index:
                    if verbose: print("[WARN ] Authentification failure: classification failed")
                    continue

                # classification was successful
                else:
                    if trace: print(f"[TRACE] Sucessfully classified:\n  Attacker: {attacker_user_index} Set:{attacker_sample_index} Attacked: {attacked_user_index}")

                    # 
                    m_A = attacked_user.m()[auth_distance_measure]
                    md_A_X = distances[attacked_user_index][auth_distance_measure]

                    sucess = False
                    # check, that distance is close enought
                    for user_B_distances in distances:
                        md_B_X = user_B_distances[auth_distance_measure]

                        # md(A, X) < m(A) + 0.5 * (md(B,X) - m(A))
                        if md_A_X >= m_A + 0.5 * (md_B_X - m_A):
                            if verbose: print("[WARN ] Authentification failure: distance too big")
                            sucess = True
                            break
                    if sucess:
                        false_accept += 1
                        if verbose: print(f"[INFO ] Successfully authenticated:\n    Attacker: {attacker_user_index} Set:{attacker_sample_index} Attacked: {attacked_user_index}")   
    
    return (attempts, false_accept)

In [None]:
# authenticate experiment
def authenticate_dataset(path_to_dataset_training: str, path_to_dataset_evaluation: str, output: str, filter:list = []):
    # open training data set
    with open(path_to_dataset_training, "rb") as fp:
        user_profiles_training = pickle.load(fp)

    # open eval data sets
    with open(path_to_dataset_evaluation, "rb") as fp:
        user_profiles_evaluation = pickle.load(fp)

    # remove row 13, 18, 26
    user_profiles_training = [UserProfile(j) for i, j in enumerate(user_profiles_training) if i not in filter]
    user_profiles_evaluation = [UserProfile(j) for i, j in enumerate(user_profiles_evaluation) if i not in filter]

    # try legitimate auth
    legal_attempts = authentication_test_legal_connection(user_profiles_training, user_profiles_evaluation)

    # try fraudulent auth
    attacks = authentication_test_attack(user_profiles_training, user_profiles_evaluation)
    
    # write results to file
    df = pd.DataFrame(data={
        'Type':['False Reject', 'False Accept'], 
        'Attempts':[legal_attempts[0], attacks[0]], 
        'Result':[legal_attempts[1], attacks[1]]
    })
    df.to_csv('./__DATA/' + output + "_authentication.csv", index=False)

In [None]:
#
# users -> sample -> key -> data
results_class_global:list[list[dict[str, tuple[int,int]]]] = []
results_md_global:list[dict[str, float]] = []



def classification_experiment(user_profiles_training: list[UserProfile], user_profiles_evaluation: list[UserProfile]) -> list[dict[str, int]]:

    users_scores: list[dict[str, int]] = []

    # for each user / userprofile := (digraphs, trigraphs, fourgraphs)
    for test_user_index, test_user in enumerate(user_profiles_evaluation):
        print(f"Start classifcation for user {test_user_index}")


        user_score: dict[str, int] = defaultdict(int)
        if debug: results_class_global.append([])

        # try to classify user with each set as test sample
        for sample_index in range(0, test_user.get_sample_count()):

            if debug: results_class_global[test_user_index].append({})

            # get sample to test
            test_sample = test_user.get_sample(sample_index)

            # remove test sample from profile
            # test_profile_removed = test_user.get_without_sample(sample_index)

            # create copy of list and swap test user profile
            #user_profiles = deepcopy(user_profiles_training)
            #user_profiles[test_user_index] = test_profile_removed

            user_profiles: list[UserProfile] = user_profiles_training

            # calculate distances from sample to user profiles
            distances: list[dict[str, float]] = [md(user_profile, test_sample) for user_profile in user_profiles]

            results_md_global = distances

            distances_c: dict[str, list[float]] = defaultdict(list)
            for entry in distances:
                for key, value in entry.items():
                    distances_c[key].append(value)

            for key in distances_c.keys():
                # process calculated distances
                classified_user_index = user_classification(distances_c, key)

                if classified_user_index == test_user_index:
                    user_score[key] += 1
                else:
                    if verbose: print(f"[DEBUG] Classification failure {key}: user: {test_user_index} sample: {sample_index} classified user: {classified_user_index}")
                    if debug: results_class_global[test_user_index][sample_index][key] = classified_user_index


        users_scores.append(user_score)

    return users_scores

In [None]:
# classification experiment
def classify_dataset(path_to_dataset_training: str, path_to_dataset_evaluation: str, output: str, filter:list = list(range(31))):
    # open training data set
    with open(path_to_dataset_training, "rb") as fp:
        user_profiles_training = pickle.load(fp)

    # open eval data sets
    with open(path_to_dataset_evaluation, "rb") as fp:
        user_profiles_evaluation = pickle.load(fp)

    # map raw profile data to UserProfile
    user_profiles_training = [UserProfile(p) for i,p in enumerate(user_profiles_training) if i in filter]
    user_profiles_evaluation = [UserProfile(p) for i,p in enumerate(user_profiles_evaluation) if i in filter]


    results: list[dict[str, int]] = classification_experiment(user_profiles_training, user_profiles_evaluation)

    df = pd.DataFrame(results)

    # get sum of correct classifications
    sums = df.sum().to_frame(name="Sucessfull")

    # total number of tried classifications
    total = 15 * len(user_profiles_evaluation)

    # calculate missclassification as (total - sucessfull classifications)
    sums["Missclassifications"] = total - sums.iloc[:,0]

    # calculate error rate
    sums["Error"] = (sums["Missclassifications"] / total) * 100
 
    # write results to file
    sums.to_csv('./__DATA/'+ output + '_classification_performance.csv')
    df.to_csv('./__DATA/' + output + '_classification.csv')

In [None]:
# read in original data
original_set = './../freetext/FreeText-Dataset-31-USERS.csv'
original_data_profiles = './__DATA/original_data_profiles'

if not os.path.isfile(original_data_profiles):
    create_user_profiles(original_set, original_data_profiles)

#classify_dataset(original_data_profiles, original_data_profiles, "original")
#authenticate_dataset(original_data_profiles, original_data_profiles, "original", filter=[])

In [None]:
# chromium remote otc ws
remote_otc_ws_set = "./otc_ws/complete.csv"
remote_otc_ws_data_profiles = "./__DATA/chromium_remote_otc_ws_profiles"

if not os.path.isfile(remote_otc_ws_data_profiles):
    create_user_profiles(remote_otc_ws_set, remote_otc_ws_data_profiles)

#classify_dataset(remote_otc_ws_data_profiles, remote_otc_ws_data_profiles, "chromium_remote_otc_ws")
#authenticate_dataset(remote_otc_ws_data_profiles, remote_otc_ws_data_profiles, "chromium_remote_otc_ws", filter=[])

In [None]:
# chromium remote aws ws
remote_aws_ws_set = "./aws_ws/complete.csv"
remote_aws_ws_data_profiles = "./__DATA/chromium_remote_aws_ws_profile"


if not os.path.isfile(remote_aws_ws_data_profiles):
    create_user_profiles(remote_aws_ws_set, remote_aws_ws_data_profiles)

#classify_dataset(remote_aws_ws_data_profiles, remote_aws_ws_data_profiles, "chromium_remote_aws_ws")
#authenticate_dataset(remote_aws_ws_data_profiles, remote_aws_ws_data_profiles, "chromium_remote_aws_ws", filter=[])

In [None]:
# chromium remote aws http
remote_aws_http_set = "./aws_http/complete.csv"
remote_aws_http_data_profiles = "./__DATA/chromium_remote_aws_http_profile"


if not os.path.isfile(remote_aws_http_data_profiles):
    create_user_profiles(remote_aws_http_set, remote_aws_http_data_profiles)

#classify_dataset(remote_aws_http_data_profiles, remote_aws_http_data_profiles, "chromium_remote_aws_http")
#authenticate_dataset(remote_aws_http_data_profiles, remote_aws_http_data_profiles, "chromium_remote_aws_http", filter=[])

In [None]:
# chromium remote otc http
remote_otc_http_set = "./otc_http/complete.csv"
remote_otc_http_data_profiles = "./__DATA/chromium_remote_otc_http_profile"


if not os.path.isfile(remote_otc_http_data_profiles):
    create_user_profiles(remote_otc_http_set, remote_otc_http_data_profiles)

#classify_dataset(remote_otc_http_data_profiles, remote_otc_http_data_profiles, "chromium_remote_otc_http")
#authenticate_dataset(remote_otc_http_data_profiles, remote_otc_http_data_profiles, "chromium_remote_otc_http", filter=[])

In [None]:
# chromium isolated
isolated_set = "./complete_chromium_isolated.csv"
isolated_data_profiles = "./__DATA/chromium_isolated_profiles"


if not os.path.isfile(isolated_data_profiles):
    create_user_profiles(isolated_set, isolated_data_profiles)

#classify_dataset(isolated_data_profiles, isolated_data_profiles, "chromium_isolated")
#authenticate_dataset(isolated_data_profiles, isolated_data_profiles, "chromium_isolated", filter=[])