# CNN model 1

In this CNN model, the data representation is similar to the one used in [this paper](https://ieeexplore.ieee.org/document/7458136), and in the HAR field using motion sensors. 
The first and last 5 points of a gesture are flattened and concatenated in a feature vector, to which time information is added. 

In [1]:
import pickle
import time

import numpy as np
import tensorflow as tf
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tqdm import tqdm

import keras_tuner as kt
from config import *

%matplotlib inline

%load_ext autoreload
%autoreload 2


## Parameters for the experiments
Set the parameters with which the experiment will be run. Below is a description for each of them.
* `DATASET`: Specify which dataset to be used. Only `BRAINRUN` is available for theses experiments The paths for the dataset are assumed to be at `./datasets`, and can be changed in the `config.py` file.
* `MODELS_DIR`: Directory where to save trained models to be used in user identification.

Here parameters for data cleanup and parsing are also set, although these are best left constant.
* `MIN_SESSION_GESTURES`: Minimum number of gestures in a session to be considered. Left at 140 as it has shown to produce good results, and include a large number of users
* `SCREENS`: The screens to use from the BrainRun dataset when performing the experiment (ignored for the Touchalytics dataset). The experiments were originally performed using either one or both of the screens *MathisisGame* or *FocusGame*, as they contain predominantly swipe data.
* `WINDOW_SIZE`: Needs to be the size of the CNN input layer


In [2]:
# Training only performed on the BrainRun dataset
DATASET = BRAINRUN
MODELS_DIR = 'model_1'

MIN_SESSION_GESTURES = 140
SCREENS = ['MathisisGame', 'FocusGame']

# WARNING: Window size has to be set to the size of the model input layer (default 11)
WINDOW_SIZE = 11

## Utility methods for parsing the data

In [3]:
def extract_features(data):
    '''
    Convert gesture to a vector of length 20, representing the coordinates of the first 5 and last 5 points.    
    '''
    result = []
    points = [(data[0]['x0'], data[0]['y0'])] + [(x['moveX'], x['moveY']) for x in data]
    points = np.array(points)

    res = np.nan_to_num(result)
    first_five_points = points[:5].flatten() 
    last_five_points = points[-5:].flatten() 
    first_five_points.resize((10,))
    last_five_points.resize((10,))

    return np.concatenate([res, first_five_points, last_five_points])
    
def gesture_to_data(c):
    '''
    Convert a gesture to a vector of length 23, containing start and stop time, gesture duration and coordinates of the 
    first and last 5 points.
    '''
    delta_time = (c['t_stop'] - c['t_start']) / 1000
    extra_features = extract_features(c['data'])
    return np.concatenate([[c['t_start'], c['t_stop'], delta_time], extra_features])

def window_to_datapoint(window):
    '''
    Convert a window of gesture to a datapoint to be used in training the model.
    Drop the start and stop time, as the user may be identified uniquely by it.
    Add time from start of window and time between gesture as extra features.
    '''
    return np.concatenate([window[:, 2:], # Exclude start and stop time
        np.concatenate([[0], (window[1:, 0] - window[0, 1]).flatten() / 1000]).reshape(window.shape[0],1), # Window start - initial point stop
        np.concatenate([[0], (window[1:, 0] - window[:-1, 1]).flatten() / 1000]).reshape(window.shape[0],1)], axis = 1).reshape(window.shape[0], window.shape[1], 1) # Window start - previous window stop

def session_to_datapoints(s):
    '''
    Convert a session to a series of datapoints, each representing a window of length WINDOW_SIZE.
    '''
    featurized_session = np.array([gesture_to_data(x) for x in s['gestures']])
    sliding_windows = (
        np.expand_dims(np.arange(WINDOW_SIZE), 0) +
        np.expand_dims(np.arange(len(featurized_session) - WINDOW_SIZE), 0).T
    )

    return np.array([window_to_datapoint(window) for window in featurized_session[sliding_windows]])

## Methods for filtering and parsing the data

In [4]:
# Limit the size of the first and second user to the maximum number of  gestures in
# remaining dataset to avoid imbalance
LIMIT_GESTURES_PER_USER = 6825

def prefilter_session(s):
    '''
    Filters the session, orders gestures chronologically and removes gestures that are outliers or from different screens
    '''
    s['gestures'].sort(key = lambda x: x['t_start'])
    s['gestures'] = [x for x in s['gestures'] 
        if x['t_stop'] - x['t_start'] > 70 and x['t_stop'] - x['t_start'] < (1000 if DATASET == BRAINRUN else 2000) and 
        ((x['screen'].split(' ')[0] in SCREENS and x['type'] == 'swipe') if DATASET == BRAINRUN else True)]

def parse_user(user_id):
    '''
    Parses all the sessions for a user with the given id. Deletes sessions that are too short after filtering them.
    '''
    i = 0

    while i < len(users[user_id]['devices'][0]['sessions']):
        prefilter_session(users[user_id]['devices'][0]['sessions'][i])

        if len(users[user_id]['devices'][0]['sessions'][i]['gestures']) < MIN_SESSION_GESTURES:
            del users[user_id]['devices'][0]['sessions'][i]
        else:
            users[user_id]['devices'][0]['sessions'][i] = session_to_datapoints(users[user_id]['devices'][0]['sessions'][i])
            i += 1

    # Limit gestures for the first and second user
    # Select gestures stratified in order to include as evenly as possible from all sessions
    inx_len = np.argsort([len(x) for x in users[user_id]['devices'][0]['sessions']])
    sessions_remaining = len(users[user_id]['devices'][0]['sessions'])
    gestures_remaining = LIMIT_GESTURES_PER_USER
    for i in inx_len:
        gestures_this_session = int(gestures_remaining / sessions_remaining)
        gestures_remaining -= gestures_this_session
        users[user_id]['devices'][0]['sessions'][i] = users[user_id]['devices'][0]['sessions'][i][:gestures_this_session]
        sessions_remaining -= 1

def get_users_over_gestures(number_of_gestures = 140):
    '''
    Returns an array with the indices of all users with more than number_of_gestures gestures.
    '''
    uc = np.zeros((len(users), ))
    for i in range(len(users)):
        uc[i] = 0
        for session in users[i]['devices'][0]['sessions']:
            uc[i] += session.shape[0]

    return np.where(uc > number_of_gestures)[0]

# Methods used for splitting the data

In [5]:
def get_train_indices(size, test_size = 0.2, val_size = 0.1, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    middle = int(size * (1 - test_size - val_size))
    return np.arange(middle - gap - 5)

def get_val_indices(size, test_size = 0.2, val_size = 0.1, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    start = int(size * (1 - test_size - val_size))
    end = int(size * (1 - test_size))
    return np.arange(start - 5, end - gap)

def get_test_indices(size, test_size = 0.2, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    middle = int(size * (1 - test_size))
    return np.arange(middle, size)

def get_train_val_test_data_for_users(valid_users):
    '''
    Splits the data for a list of users into train, val and test, stratified for each session and merginf the session data in the process.
    '''
    X_train, X_val, X_test, y_train, y_val, y_test = [], [], [], [], [], []
    for user_id in valid_users:
        temp_x_train = np.concatenate([session[get_train_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        temp_x_val = np.concatenate([session[get_val_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        temp_x_test = np.concatenate([session[get_test_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        X_train.append(temp_x_train)
        X_val.append(temp_x_val)
        X_test.append(temp_x_test)
        y_train.append(np.zeros(temp_x_train.shape[0]) + user_id)
        y_val.append(np.zeros(temp_x_val.shape[0]) + user_id)
        y_test.append(np.zeros(temp_x_test.shape[0]) + user_id)
      
    X_train = np.concatenate(X_train)
    X_val = np.concatenate(X_val)
    X_test = np.concatenate(X_test)
    y_train = np.concatenate(y_train)
    y_val = np.concatenate(y_val)
    y_test = np.concatenate(y_test)

    encoder = LabelEncoder()
    encoder.fit(y_train)
    y_train = encoder.transform(y_train)
    y_val = encoder.transform(y_val)
    y_test = encoder.transform(y_test)

    y_train = np_utils.to_categorical(y_train)
    y_val = np_utils.to_categorical(y_val)
    y_test = np_utils.to_categorical(y_test)

    return X_train, X_val, X_test, y_train, y_val, y_test 

# Methods for building the model

In [6]:
def build_model():
  '''
  Builds the model with the best hyperparameters (128 dense layer)
  '''

  input_shape = layers.Input(shape=(X_train.shape[1], X_train.shape[2], 1))
  dropout_rate = 0.1

  cnn = layers.Conv2D(64, (3, 3), activation='linear')(input_shape)
  cnn = layers.BatchNormalization()(cnn)
  cnn = layers.ReLU()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Conv2D(32, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Conv2D(32, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.Flatten()(cnn)

  dense = layers.Dense(128, activation='relu')(cnn)
  dense = layers.Dense(y_train.shape[1])(dense)

  output = layers.Softmax()(dense)

  model = models.Model(input_shape, output)

  lr = 0.001
  decay = 0.9
  epsilon = 10e-6

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=decay, epsilon=epsilon),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', 'top_k_categorical_accuracy'])
              
  return model

# Monitor training time
class MonitorTime(keras.callbacks.Callback):
    def __init__(self):
        super(MonitorTime, self).__init__()
    
    def on_train_begin(self, *args):
        self.start_time= time.time()
        
    def on_train_end(self, *args):
        stop_time=time.time()
        duration = stop_time- self.start_time             
        print(duration) 

In [7]:
# Load data
with open(f'{DATA_PATH}/brainrun_full_not_parsed.pkl', 'rb') as f:
    users = pickle.load(f)

for user in tqdm(range(len(users))):
    parse_user(user)

100%|██████████| 225/225 [00:20<00:00, 11.13it/s]


# User identification experiment
Run the experiment to identify the users using the CNN model with the best parameters.

In [None]:
valid_users = get_users_over_gestures(140)
X_train, X_val, X_test, y_train, y_val, y_test = get_train_val_test_data_for_users(valid_users)

model = build_model()

# Stop early to avoid overfit
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

model.fit(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early, MonitorTime()])
model.evaluate(X_test, y_test)

# Hyperparameter tuning
Select the best hyperparameters for a certain model architecture.
Only the best model hyperparameter tuning is presented.

In [None]:
def build_model_with_hp(hp):
  '''
  Builds the model with the best hyperparameters (128 dense layer)
  '''

  input_shape = layers.Input(shape=(X_train.shape[1], X_train.shape[2], 1))
  dropout_rate = hp.Choice('dropout_rate', [0.0, 0.1, 0.2, 0.3, 0.4])

  layer_1_filters = hp.Choice('layer_1_filters', [32, 64, 128])
  layer_1_kernel_size = hp.Choice('layer_1_kernel_size', [3, 5])
  cnn = layers.Conv2D(layer_1_filters, (layer_1_kernel_size, layer_1_kernel_size), activation='linear')(input_shape)
  cnn = layers.BatchNormalization()(cnn)
  cnn = layers.ReLU()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  layer_2_filters = hp.Choice('layer_2_filters', [32, 64, 128])
  cnn = layers.Conv2D(layer_2_filters, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  layer_3_filters = hp.Choice('layer_2_filters', [32, 64, 128])
  cnn = layers.Conv2D(layer_3_filters, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.Flatten()(cnn)

  dense = layers.Dense(128, activation='relu')(cnn)
  dense = layers.Dense(y_train.shape[0])(dense)

  output = layers.Softmax()(dense)

  model = models.Model(input_shape, output)

  lr = 0.001
  decay = 0.9
  epsilon = 10e-6

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=decay, epsilon=epsilon),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', 'top_k_categorical_accuracy'])
              
  return model

tuner = kt.Hyperband(build_model_with_hp,
                     objective='val_accuracy',
                     max_epochs=50,
                     factor=2,
                     directory='model_hypertune',
                     project_name='model_1')


stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

tuner.search(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early])

# Train models for user authentication

Perform 10 iteration, randomly selecting 90% of the users for training and the rest for  testing. Save both the users, models and results.

In [None]:
for iteration in range(10):
  all_users = get_users_over_gestures(140)
  np.random.shuffle(all_users)
  train_users = all_users[:int(0.9 * len(all_users))]
  test_users = all_users[int(0.9 * len(all_users)):]

  X_train, X_val, X_test, y_train, y_val, y_test = get_train_val_test_data_for_users(valid_users)

  model = build_model()

  # Stop early to avoid overfit
  stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

  history = model.fit(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early])
  test_results = model.evaluate(X_test, y_test)

  model.save(f'{MODELS_DIR}/models/simple_cnn_128_embedding_{iteration}.h5')
  with open(f'{MODELS_DIR}/results/iteration_{iteration}.pkl', 'wb') as f:
      pickle.dump([train_users, test_users, history.history, test_results], f)