# One class SVM model using automatically extracted features - Model 1

This notebook contains the code to run experiments for intruder detection using the first CNN model (Model 1) in order to automatically
extract features from gestures. The model should extract both single-gesture features, as well as inter-gesture features relating to the
time difference in a sliding window. 

The experiments are run similar to the ones for the 1-class SVM with manually engineered features. However, these notebook requries a `models` folder
containig the trained models, and users used for training the models (in order to be excluded). Links to these resources can be found in the `README.md` file. 

Experiments run multithreaded and save the output in the `OUTPUT_DIR` folder. Set the `N_THREADS` variable to the number of threads you want to use (recommended 4-8 threads on a normal machine).

In [1]:
import multiprocessing as mp
import os
import pickle
import time

import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_curve
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from tensorflow import keras
from tensorflow.keras import models
from tqdm import tqdm

from config import *

%matplotlib inline

%load_ext autoreload
%autoreload 2
 

## Parameters for the experiments
Set the parameters with which the experiment will be run. Below is a description for each of them.
* `DATASET`: Specify which dataset to be used. Choose between BRAINRUN and TOUCHALYTICS. The paths for the dataset are assumed to be at `./datasets`, and can be changed in the `config.py` file.
* `N_THREADS`: Number of threads to use for the experiment.
* `OUTPUT_DIR`: Directory to which the results will be written.
* `MODELS_DIR`: Directory containing the trained models.

Here we also set the parameters for data cleanup and parsing, although these are best left constant.
* `MIN_SESSION_GESTURES`: Minimum number of gestures in a session to be considered. Left at 140 as it has shown to produce good results, and include a large number of users
* `SCREENS`: The screens to use from the BrainRun dataset when performing the experiment (ignored for the Touchalytics dataset). The experiments were originally performed using either one or both of the screens *MathisisGame* or *FocusGame*, as they contain predominantly swipe data.
* `WINDOW_SIZE`: Needs to be the same as the size of the input to the model (constant 11 throught the experiment). 

In [2]:
DATASET = TOUCHALYTICS # Choose between BRAINRUN and TOUCHALYTICS
N_THREADS = 32
OUTPUT_DIR = 'test_results2/'
MODELS_DIR = 'models/model_1'

MIN_SESSION_GESTURES = 140
SCREENS = ['MathisisGame', 'FocusGame']

# WARNING: Window size has to be set to the size of the model input layer
WINDOW_SIZE = 11

## Utility methods for extracting features and splitting the data

In [3]:
def gesture_to_points(data):
    '''
    Convert a gesture to a list of the first 5 and last 5 points (flatenned)
    '''
    result = []
    points = [(data[0]['x0'], data[0]['y0'])] + [(x['moveX'], x['moveY']) for x in data]
    points = np.array(points)

    res = np.nan_to_num(result)
    first_three_points = points[:5].flatten() 
    last_three_points = points[-5:].flatten() 
    first_three_points.resize((10,))
    last_three_points.resize((10,))

    return np.concatenate([res, first_three_points, last_three_points])
    
def gesture_to_data(c):
    '''
    Convert a gesture to a list of points, including start and stop time of the gesture. These will be
    removed later
    '''
    # Swipe duration
    delta_time = (c['t_stop'] - c['t_start']) / 1000
    extra_features = gesture_to_points(c['data'])

    # Start and stop time removed when sliding window is used
    return np.concatenate([[c['t_start'], c['t_stop'], delta_time], extra_features])

def window_to_datapoint(window):
    '''
    Remove the start and stop time from a window and add inter-gesture features
    '''
    return np.concatenate([window[:, 2:], # Exclude start and stop time
        np.concatenate([[0], (window[1:, 0] - window[0, 1]).flatten() / 1000]).reshape(window.shape[0],1), # Window start - initial point stop
        np.concatenate([[0], (window[1:, 0] - window[:-1, 1]).flatten() / 1000]).reshape(window.shape[0],1)], axis = 1).reshape(window.shape[0], window.shape[1], 1) # Window start - previous window stop

def session_to_datapoints(s):
    '''
    Convert a session to a series of datapoints, representing the 128 feature embeddings of
    the windows.
    '''
    featurized_session = np.array([gesture_to_data(x) for x in s['gestures']])
    sliding_windows = (
        np.expand_dims(np.arange(WINDOW_SIZE), 0) +
        np.expand_dims(np.arange(len(featurized_session) - WINDOW_SIZE), 0).T
    )

    temp = np.array([window_to_datapoint(window) for window in featurized_session[sliding_windows]])
    return deep_model.predict(temp)


def get_train_indices(size, test_size = 0.2, gap = WINDOW_SIZE, max_size = np.inf):
    '''
    Returns the train indices for a given session.
    Leaves a space of `gap` between the train and test indices
    '''
    size = min(size, max_size)
    middle = int(size * (1 - test_size))
    middle = min(middle, max_size)
    return np.arange(middle - gap)

def get_test_indices(size, test_size = 0.2, gap = WINDOW_SIZE, max_size = np.inf):
    '''
    Returns the test indices for a given session.
    '''
    size = min(size, max_size)
    middle = int(size * (1 - test_size))
    return np.arange(middle, size)

def get_intruder_size(size, test_size = 0.2, gap = WINDOW_SIZE, max_size = np.inf):
    '''
    Returns validation and test indices for intruders. Also leaves a space of `gap` 
    between the validation and test indices.
    '''
    size = min(size, max_size)
    middle = int(size * (1 - test_size))
    validation_middle = int(size * (1 - test_size / 2))
    return np.arange(middle, validation_middle - gap), np.arange(validation_middle, size) 

## Methods for filtering and parsing the data

In [4]:
from sklearn.neighbors import LocalOutlierFactor

def prefilter_session(s):
    '''
    Filters the session, orders gestures chronologically and removes gestures that are outliers or from different screens
    '''
    s['gestures'].sort(key = lambda x: x['t_start'])
    s['gestures'] = [x for x in s['gestures'] 
        if x['t_stop'] - x['t_start'] > 70 and x['t_stop'] - x['t_start'] < (1000 if DATASET == BRAINRUN else 2000) and 
        ((x['screen'].split(' ')[0] in SCREENS and x['type'] == 'swipe') if DATASET == BRAINRUN else True)]

def parse_user(user_id):
    '''
    Parses all the sessions for a user with the given id. Deletes sessions that are too short after filtering them.
    '''

    i = 0
    should_delete = False
    while i < len(users[user_id]['devices'][0]['sessions']):
        prefilter_session(users[user_id]['devices'][0]['sessions'][i])

        if len(users[user_id]['devices'][0]['sessions'][i]['gestures']) < MIN_SESSION_GESTURES or should_delete:
            del users[user_id]['devices'][0]['sessions'][i]
        else:
            users[user_id]['devices'][0]['sessions'][i] = session_to_datapoints(users[user_id]['devices'][0]['sessions'][i])
            # Outlier detection
            clf = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
            users[user_id]['devices'][0]['sessions'][i] = \
                users[user_id]['devices'][0]['sessions'][i][np.where(clf.fit_predict(users[user_id]['devices'][0]['sessions'][i]) == 1)]
            i += 1

def get_users_over_gestures(number_of_gestures = 140):
    '''
    Returns an array with the indices of all users with more than number_of_gestures gestures.
    '''
    uc = np.zeros((len(users), ))
    for i in range(len(users)):
        uc[i] = 0
        for session in users[i]['devices'][0]['sessions']:
            uc[i] += session.shape[0]

    return np.where(uc > number_of_gestures)[0]

def compute_eer(label, pred):
    """
    Computes EER given a list of labels and predictions.

    Code inspired by https://github.com/YuanGongND/python-compute-eer
    """
    # all fpr, tpr, fnr, fnr, threshold are lists (in the format of np.array)
    fpr, tpr, threshold = roc_curve(label, pred)
    fnr = 1 - tpr

    # theoretically eer from fpr and eer from fnr should be identical but they can be slightly differ in reality
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]

    # return the mean of eer from fpr and from fnr
    eer = (eer_1 + eer_2) / 2
    return eer


# Code for multithreaded experiments

Since the datasets are large and a iteration is done for each users, the experiments take a significant amount of time to run on a normal machine. Multiprocessing was used to run the experiments in parallel on a powerful machine, using 32 cores (this reduces the time to about an hour). Below is the code used to run the experiments using different processes. 

In [5]:
# Parameter space (for hyperparameter tuning)
parameters = [{
    'kernel': ['rbf'], 'gamma': [1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001], 'nu': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],},
    ]

def run_experiment_multithreaded(output_path, dataset, valid_users):
    '''
    Runs experiment with the data provided in the `users` variable, and outputs the results to `output_path`.
    The first and second userss of the BrainRun dataset contain significantly more data than the others, so filter 
    the hyperparameter selection is run multithreaded for them.
    '''
    FILE_PATH = f'{output_path}/{dataset}'

    def run_experiment_for_user(user_id, users, valid_users, parameters):        
        X_train = np.concatenate([session[get_train_indices(session.shape[0],)] for session in users[int(user_id)]['devices'][0]['sessions']])
        X_test = np.concatenate([session[get_test_indices(session.shape[0]),] for session in users[int(user_id)]['devices'][0]['sessions']])
        
        # Cross validation - choose best hyperparameters for each user
        # Default hyperparameters (will be replaced by the best hyperparameters)
        hyperparameters = [(100, {'kernel':'rbf', 'gamma':0.8, 'nu': 0.3})]
        for hyper_pair in ParameterGrid(parameters):
            # Use time series split cross-validation
            cv = TimeSeriesSplit(n_splits=4, gap = WINDOW_SIZE)
            avg_eer = []
            for train, test in cv.split(X_train):
                clf = OneClassSVM(
                    kernel = hyper_pair['kernel'], 
                    nu=hyper_pair['nu'], 
                    degree = hyper_pair['degree'] if 'degree' in hyper_pair else 0, 
                    gamma = hyper_pair['gamma'] if 'gamma' in hyper_pair else 0,)

                scaler = StandardScaler()
                clf.fit(scaler.fit_transform(X_train[train]))

                # Predict
                res = np.concatenate([
                    clf.decision_function(scaler.transform(X_train[test])), *[
                        clf.decision_function(scaler.transform(session[get_intruder_size(session.shape[0])[0]])) for other_uid in valid_users[valid_users != user_id] for session in users[other_uid]['devices'][0]['sessions']]
                ])
                # Build labels based on test data and result
                y_test = np.concatenate([np.zeros((test.shape[0],)) + 1, np.zeros((res.shape[0] - test.shape[0],)) - 1])
                
                # Set inf and -inf predictions to a reasonable number
                res[np.isneginf(res)] = -100000
                res[np.isposinf(res)] = 100000
                avg_eer.append(compute_eer(y_test, res))
            hyperparameters.append((np.mean(avg_eer), hyper_pair))
        
        # Get the best hyperparams
        best_hyperparameters = sorted(hyperparameters, key=lambda x: x[0])[0][1]
        svm = OneClassSVM(
                    kernel = best_hyperparameters['kernel'], 
                    nu=best_hyperparameters['nu'], 
                    degree = best_hyperparameters['degree'] if 'degree' in best_hyperparameters else 0, 
                    gamma = best_hyperparameters['gamma'] if 'gamma' in best_hyperparameters else 0,)

        # Normalize the data using StandardScaler - fit only on train data and use the same scaler for both train and test
        scaler = StandardScaler()
        svm.fit(scaler.fit_transform(X_train))

        # Predict
        res = np.concatenate([
            svm.decision_function(scaler.transform(X_test)), *[
                svm.decision_function(scaler.transform(session[get_intruder_size(session.shape[0])[1]])) for other_uid in valid_users[valid_users != user_id] for session in users[other_uid]['devices'][0]['sessions']]
        ])

        # Build labels based on the test data and result - user labels are set as 1, intruder labels are set as -1
        y_test = np.concatenate([np.zeros((X_test.shape[0],)) + 1, np.zeros((res.shape[0] - X_test.shape[0],)) - 1])

        # Save results with pickle to a file
        with open(f'{FILE_PATH}/user_{user_id}.pkl', 'wb') as f:
            pickle.dump((y_test, res, hyperparameters), f)

    def find_hyper(user_id, hyper_pair, users, valid_users, X_train, hyperparameters):
        '''
        Finds the best hyperparmaters for a specific user.
        '''
        cv = TimeSeriesSplit(n_splits=4, gap = WINDOW_SIZE)
        avg_eer = []
        for train, test in cv.split(X_train):
            clf = OneClassSVM(
                kernel = hyper_pair['kernel'], 
                nu=hyper_pair['nu'], 
                degree = hyper_pair['degree'] if 'degree' in hyper_pair else 0, 
                gamma = hyper_pair['gamma'] if 'gamma' in hyper_pair else 0,)
            scaler = StandardScaler()
            clf.fit(scaler.fit_transform(X_train[train]))
            res = np.concatenate([
                clf.decision_function(scaler.transform(X_train[test])), *[
                    clf.decision_function(scaler.transform(session[get_intruder_size(session.shape[0])[0]])) for other_uid in valid_users[valid_users != user_id] for session in users[other_uid]['devices'][0]['sessions']]
            ])
            y_test = np.concatenate([np.zeros((test.shape[0],)) + 1, np.zeros((res.shape[0] - test.shape[0],)) - 1])
            
            res[np.isneginf(res)] = -1000
            res[np.isposinf(res)] = 1000
            avg_eer.append(compute_eer(y_test, res))
        hyperparameters.append((np.mean(avg_eer), hyper_pair))

    if not os.path.exists(FILE_PATH):
        os.makedirs(FILE_PATH)

    for user_id in list(set(valid_users).intersection([0,1] if DATASET == BRAINRUN else [])):
        max_train_per_session = np.inf

        # Uncomment this line to test the effect of different number of gestures
        # max_train_per_session = int(140 / len(users[int(user_id)]['devices'][0]['sessions']))
        
        X_train = np.concatenate([session[get_train_indices(session.shape[0], max_size=max_train_per_session)] for session in users[int(user_id)]['devices'][0]['sessions']])
        X_test = np.concatenate([session[get_test_indices(session.shape[0]),] for session in users[int(user_id)]['devices'][0]['sessions']])

        # Cross validation
        # Users 0 and 1 of the BrainRun dataset contain significantly more data, and cross-validation is done multi-threaded for efficiency.
        # For all other users cross-validation is done in a single thread, as the overhead is not justified.
        threads = []
        cid = 0
        can_exit = False

        hyper_grid = list(ParameterGrid(parameters))
        manager = mp.Manager()
        hyperparameters = manager.list()

        pbar = tqdm(total=len(hyper_grid))
        while not can_exit:
            while len(threads) < N_THREADS and cid < len(hyper_grid):
                thread = mp.Process(target=find_hyper, args=(user_id, hyper_grid[cid], users, valid_users, X_train, hyperparameters))
                thread.start()
                threads.append(thread)
                pbar.update(1)
                cid += 1

            for thread in threads:
                if not thread.is_alive():
                    thread.join()
                    threads.remove(thread)

            if(len(threads) == 0):
                can_exit = True
            time.sleep(1)
        pbar.close()

        # End hyperparameter search

        # Fit and test the model (same process as before)
        hyperparameters = list(hyperparameters)
        best_hyperparameters = sorted(hyperparameters, key=lambda x: x[0])[0][1]
        svm = OneClassSVM(
                    kernel = best_hyperparameters['kernel'], 
                    nu=best_hyperparameters['nu'], 
                    degree = best_hyperparameters['degree'] if 'degree' in best_hyperparameters else 0, 
                    gamma = best_hyperparameters['gamma'] if 'gamma' in best_hyperparameters else 0,)
        scaler = StandardScaler()

        svm.fit(scaler.fit_transform(X_train))

        res = np.concatenate([
            svm.decision_function(scaler.transform(X_test)), *[
                svm.decision_function(scaler.transform(session[get_intruder_size(session.shape[0])[1]])) for other_uid in valid_users[valid_users != user_id] for session in users[other_uid]['devices'][0]['sessions']]
        ])

        y_test = np.concatenate([np.zeros((X_test.shape[0],)) + 1, np.zeros((res.shape[0] - X_test.shape[0],)) - 1])

        # Save results with pickle to a file
        with open(f'{FILE_PATH}/user_{user_id}.pkl', 'wb') as f:
            pickle.dump((y_test, res, hyperparameters), f)

    threads = []
    cid = 0
    can_exit = False

    if not os.path.exists(FILE_PATH):
        os.makedirs(FILE_PATH)

    # Same process of excluding the first two users for the BrainRun dataset
    vu = list(set(valid_users).difference([0,1] if DATASET == BRAINRUN else []))
    pbar = tqdm(total=len(vu))
    while not can_exit:
        while len(threads) < N_THREADS and cid < len(vu):
            user_id = vu[cid]
            thread = mp.Process(target=run_experiment_for_user, args=(vu[cid], users, valid_users, parameters))
            thread.start()
            threads.append(thread)
            cid += 1
            pbar.update(1)

        for thread in threads:
            if not thread.is_alive():
                thread.join()
                threads.remove(thread)

        if(len(threads) == 0):
            can_exit = True

        time.sleep(1)
    

In [None]:
# Do 10 iterations, using the users the model was not trained on each time
for iteration in range(10):
    with open(f'{MODELS_DIR}/results/iteration_{iteration}.pkl', 'rb') as f:
        u_training, u_testing, *_ = pickle.load(f)

    # Only u_testing will be used 
    USERS_USED_FOR_TRAINING_FEAT_EXTRACTOR = u_training
    USERS_USED_FOR_TESTING_GENERALIZATION = u_testing

    # Load the model and remove the last layers, until the embedding layer is reached
    deep_model = models.load_model(f'{MODELS_DIR}/models/simple_cnn_128_embedding_{iteration}.h5')
    deep_model = models.Model(inputs = deep_model.input, outputs = deep_model.layers[-3].output)

    # Load data
    if DATASET == BRAINRUN:
        with open('brainrun_full_not_parsed.pkl', 'rb') as f:
            users = pickle.load(f)
            valid_users = []
            # Keep only the users on which the model wasn't trained on
            for i, user in enumerate(users):
                if i in USERS_USED_FOR_TESTING_GENERALIZATION:
                    valid_users.append(user)

            users = valid_users
    if DATASET == TOUCHALYTICS:
        with open('touchalytics_full_not_parsed.pkl', 'rb') as f:
            users = pickle.load(f)

    # Parse data
    for user in tqdm(range(len(users))):
        parse_user(user)

    valid_users = get_users_over_gestures(140)

    run_experiment_multithreaded(OUTPUT_DIR, iteration, valid_users)
        