# CNN model 1

In this CNN model, the data representation is similar to the one used in [this paper](https://ieeexplore.ieee.org/document/7458136), and in the HAR field using motion sensors. 
The first and last 5 points of a gesture are flattened and concatenated in a feature vector, to which time information is added. 

In [4]:
import pickle
import time

import cv2
import numpy as np
import tensorflow as tf
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers, models
from tqdm import tqdm

import keras_tuner as kt
from config import *

%matplotlib inline

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Parameters for the experiments
Set the parameters with which the experiment will be run. Below is a description for each of them.
* `DATASET`: Specify which dataset to be used. Only `BRAINRUN` is available for theses experiments The paths for the dataset are assumed to be at `./datasets`, and can be changed in the `config.py` file.
* `MODELS_DIR`: Directory where to save trained models to be used in user identification.

Here parameters for data cleanup and parsing are also set, although these are best left constant.
* `MIN_SESSION_GESTURES`: Minimum number of gestures in a session to be considered. Left at 140 as it has shown to produce good results, and include a large number of users
* `SCREENS`: The screens to use from the BrainRun dataset when performing the experiment (ignored for the Touchalytics dataset). The experiments were originally performed using either one or both of the screens *MathisisGame* or *FocusGame*, as they contain predominantly swipe data.
* `WINDOW_SIZE`: Needs to be the size of the CNN input layer


In [5]:
# Training only performed on the BrainRun dataset
DATASET = BRAINRUN
WINDOW_SIZE = 11
MODELS_DIR = f'models/model_2_ws_{WINDOW_SIZE}'

MIN_SESSION_GESTURES = 140
SCREENS = ['MathisisGame', 'FocusGame']

# Size of the input image (assume square). All experiments used an image of size 32x32.
IMAGE_SIZE = 32
# Size of the canvas to draw the gesture. All experiments used a canvas of size 128x128, and 
# the resulting images were downsampled to 32x32.
CANVAS_SIZE = 128

## Utility methods for parsing the data

In [6]:
def draw_line(x0, y0, x1, y1, img_size, img, vel = 1):
    '''
    Draws a line starting from (x0, y0) and ending at (x1, y1) on the image `img`, with the intensity
    set by `vel`.
    '''
    rr,cc,val = line_aa(int(x0 * img_size), int(y0 * img_size), int(x1 * img_size), int(y1 * img_size))
    if x0 > 1 or y0 > 1 or x1 > 1 or y1 > 1:
        print(x0, y0, x1, y1)
    img[rr,cc] = val * vel

def points_to_image(points):
    '''
    Converts a list of points ((x,y) pairs) into an image of size `IMAGE_SIZE` x `IMAGE_SIZE`.
    Encodes velocities between each pair of points as intensity in the image.
    '''
    init = np.zeros((CANVAS_SIZE, CANVAS_SIZE))

    # Get a list of velocities between pairwise points
    velocities = [np.linalg.norm(p1 - p2) for p1, p2 in zip(points[:-1], points[1:])]
    velocities = (velocities - np.min(velocities)) / (np.ptp(velocities) or 1)

    # Draw each line segment
    x0, y0 = points[0][1], points[0][0]
    for datapoint, velocity in zip(points[1:], velocities):
        draw_line(x0, y0, datapoint[1], datapoint[0], 128 - 1, init, vel = velocity)
        x0, y0 = datapoint[1], datapoint[0]

    # Resize the image to the desired size
    res = init
    res = cv2.resize(init, dsize=(IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_CUBIC)
    mean, std = res.mean(), res.std()
    res = (res - mean) / (std or 1)
    res = res.reshape(IMAGE_SIZE, IMAGE_SIZE, 1)
    return res

def gesture_to_image(c):
    '''
    Convert a gesture into an image representing the path of the gesture.
    '''
    clip = lambda x: np.clip(x, 0, 1)
    img_data = c['data']
    
    points = np.array([[clip(img_data[0]['x0']), clip(img_data[0]['y0'])]] + [[clip(pt['moveX']), clip(pt['moveY'])] for pt in img_data])
    return points_to_image(points) 

def window_to_datapoint(window):
    '''
    Sum WINDOW_SIZE windows into a single image.
    '''
    return np.sum(window, axis = 0)
    
def session_to_datapoints(s):
    '''
    Convert a session to a series of datapoints, each representing a window of length WINDOW_SIZE.
    '''
    featurized_session = np.array([gesture_to_image(x) for x in s['gestures']])
    sliding_windows = (
        np.expand_dims(np.arange(WINDOW_SIZE), 0) +
        np.expand_dims(np.arange(len(featurized_session) - WINDOW_SIZE), 0).T
    )

    return np.array([window_to_datapoint(window) for window in featurized_session[sliding_windows]])

## Methods for filtering and parsing the data

In [7]:
# Limit the size of the first and second user to the maximum number of  gestures in
# remaining dataset to avoid imbalance
LIMIT_GESTURES_PER_USER = 6825

def prefilter_session(s):
    '''
    Filters the session, orders gestures chronologically and removes gestures that are outliers or from different screens
    '''
    s['gestures'].sort(key = lambda x: x['t_start'])
    s['gestures'] = [x for x in s['gestures'] 
        if x['t_stop'] - x['t_start'] > 70 and x['t_stop'] - x['t_start'] < (1000 if DATASET == BRAINRUN else 2000) and 
        ((x['screen'].split(' ')[0] in SCREENS and x['type'] == 'swipe') if DATASET == BRAINRUN else True)]

def parse_user(user_id):
    '''
    Parses all the sessions for a user with the given id. Deletes sessions that are too short after filtering them.
    '''
    i = 0

    while i < len(users[user_id]['devices'][0]['sessions']):
        prefilter_session(users[user_id]['devices'][0]['sessions'][i])

        if len(users[user_id]['devices'][0]['sessions'][i]['gestures']) < MIN_SESSION_GESTURES:
            del users[user_id]['devices'][0]['sessions'][i]
        else:
            users[user_id]['devices'][0]['sessions'][i] = session_to_datapoints(users[user_id]['devices'][0]['sessions'][i])
            i += 1

    # Limit gestures for the first and second user
    # Select gestures stratified in order to include as evenly as possible from all sessions
    inx_len = np.argsort([len(x) for x in users[user_id]['devices'][0]['sessions']])
    sessions_remaining = len(users[user_id]['devices'][0]['sessions'])
    gestures_remaining = LIMIT_GESTURES_PER_USER
    for i in inx_len:
        gestures_this_session = int(gestures_remaining / sessions_remaining)
        gestures_remaining -= gestures_this_session
        users[user_id]['devices'][0]['sessions'][i] = users[user_id]['devices'][0]['sessions'][i][:gestures_this_session]
        sessions_remaining -= 1

def get_users_over_gestures(number_of_gestures = 140):
    '''
    Returns an array with the indices of all users with more than number_of_gestures gestures.
    '''
    uc = np.zeros((len(users), ))
    for i in range(len(users)):
        uc[i] = 0
        for session in users[i]['devices'][0]['sessions']:
            uc[i] += session.shape[0]

    return np.where(uc > number_of_gestures)[0]

# Methods used for splitting the data

In [8]:
def get_train_indices(size, test_size = 0.2, val_size = 0.1, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    middle = int(size * (1 - test_size - val_size))
    return np.arange(middle - gap)

def get_val_indices(size, test_size = 0.2, val_size = 0.1, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    start = int(size * (1 - test_size - val_size))
    end = int(size * (1 - test_size))
    return np.arange(start, end - gap)

def get_test_indices(size, test_size = 0.2, gap = WINDOW_SIZE, max_size = 50000):
    size = min(size, max_size)
    middle = int(size * (1 - test_size))
    return np.arange(middle, size)

def get_train_val_test_data_for_users(valid_users):
    '''
    Splits the data for a list of users into train, val and test, stratified for each session and merginf the session data in the process.
    '''
    X_train, X_val, X_test, y_train, y_val, y_test = [], [], [], [], [], []
    for user_id in valid_users:
        temp_x_train = np.concatenate([session[get_train_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        temp_x_val = np.concatenate([session[get_val_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        temp_x_test = np.concatenate([session[get_test_indices(session.shape[0])] for session in users[int(user_id)]['devices'][0]['sessions']])
        X_train.append(temp_x_train)
        X_val.append(temp_x_val)
        X_test.append(temp_x_test)
        y_train.append(np.zeros(temp_x_train.shape[0]) + user_id)
        y_val.append(np.zeros(temp_x_val.shape[0]) + user_id)
        y_test.append(np.zeros(temp_x_test.shape[0]) + user_id)
      
    X_train = np.concatenate(X_train)
    X_val = np.concatenate(X_val)
    X_test = np.concatenate(X_test)
    y_train = np.concatenate(y_train)
    y_val = np.concatenate(y_val)
    y_test = np.concatenate(y_test)

    encoder = LabelEncoder()
    encoder.fit(y_train)
    y_train = encoder.transform(y_train)
    y_val = encoder.transform(y_val)
    y_test = encoder.transform(y_test)

    y_train = np_utils.to_categorical(y_train)
    y_val = np_utils.to_categorical(y_val)
    y_test = np_utils.to_categorical(y_test)

    return X_train, X_val, X_test, y_train, y_val, y_test 

# Methods for building the model

In [9]:
def build_model():
  '''
  Build the model with the best hyperparameters (dense size 128).
  '''
  dropout_rate = 0.3

  input_shape = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 1))

  cnn = layers.Conv2D(128, (5, 5), activation='linear')(input_shape)
  cnn = layers.BatchNormalization()(cnn)
  cnn = layers.ReLU()(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Conv2D(32, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Conv2D(128, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Flatten()(cnn)

  dense = layers.Dense(256, activation='relu')(cnn)
  dense = layers.Dense(128, activation='relu')(dense)
  dense = layers.Dense(y_train.shape[1])(dense)

  dense = layers.Softmax()(dense)

  model = models.Model(input_shape, dense)

  lr = 0.001
  decay = 0.8
  epsilon = 1e-7

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=decay, epsilon=epsilon),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', 'top_k_categorical_accuracy'])
  return model

# Monitor training time
class MonitorTime(keras.callbacks.Callback):
    def __init__(self):
        super(MonitorTime, self).__init__()
    
    def on_train_begin(self, *args):
        self.start_time= time.time()
        
    def on_train_end(self, *args):
        stop_time=time.time()
        duration = stop_time- self.start_time             
        print(duration) 

In [10]:
# Load data
with open(f'{DATA_PATH}/brainrun_full_not_parsed.pkl', 'rb') as f:
    users = pickle.load(f)

for user in tqdm(range(len(users))):
    parse_user(user)

100%|██████████| 225/225 [02:55<00:00,  1.28it/s]


# User identification experiment
Run the experiment to identify the users using the CNN model with the best parameters.

In [None]:
valid_users = get_users_over_gestures(140)
X_train, X_val, X_test, y_train, y_val, y_test = get_train_val_test_data_for_users(valid_users)

model = build_model()

# Stop early to avoid overfit
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

model.fit(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early, MonitorTime()])
model.evaluate(X_test, y_test)

# Hyperparameter tuning
Select the best hyperparameters for a certain model architecture.
Only the best model hyperparameter tuning is presented.

In [None]:
def build_model_with_hp(hp):
  '''
  Choose best hyperparameters for the model (128 dense layer)
  '''

  dropout_rate = hp.Choice('dropout', [0.0, 0.1, 0.2, 0.3, 0.4])

  input_shape = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 1))

  filters_layer_1 = hp.Choice('filters_1', [32, 64, 128])
  kernel_size_layer_1 = hp.Choice('kernel_size_1', [3, 5])
  cnn = layers.Conv2D(filters_layer_1, (kernel_size_layer_1, kernel_size_layer_1), activation='linear')(input_shape)
  cnn = layers.BatchNormalization()(cnn)
  cnn = layers.ReLU()(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  filters_layer_2 = hp.Choice('filters_2', [32, 64, 128])
  cnn = layers.Conv2D(filters_layer_2, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  filters_layer_3 = hp.Choice('filters_3', [32, 64, 128])
  cnn = layers.Conv2D(filters_layer_3, (3, 3), activation='relu')(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)
  cnn = layers.MaxPool2D()(cnn)
  cnn = layers.Dropout(dropout_rate)(cnn)

  cnn = layers.Flatten()(cnn)

  dense_size = hp.Choice('dense_size', [128, 256, 512, 1024])
  dense = layers.Dense(dense_size, activation='relu')(cnn)
  dense = layers.Dense(128, activation='relu')(dense)
  dense = layers.Dense(y_train.shape[1])(dense)

  dense = layers.Softmax()(dense)

  model = models.Model(input_shape, dense)

  lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5])
  decay = hp.Choice('decay', values=[0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9])
  epsilon = hp.Choice('epsilon', values=[1e-5, 1e-6, 1e-7, 1e-8, 1e-9])

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=decay, epsilon=epsilon),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy', 'top_k_categorical_accuracy'])
  return model

tuner = kt.Hyperband(build_model_with_hp,
                     objective='val_accuracy',
                     max_epochs=50,
                     factor=2,
                     directory='model_hypertune',
                     project_name='model_2')


stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

tuner.search(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early])

# Train models for user authentication

Perform 10 iteration, randomly selecting 90% of the users for training and the rest for  testing. Save both the users, models and results.

In [None]:
for iteration in range(10):
  all_users = get_users_over_gestures(140)
  np.random.shuffle(all_users)
  train_users = all_users[:int(0.9 * len(all_users))]
  test_users = all_users[int(0.9 * len(all_users)):]

  X_train, X_val, X_test, y_train, y_val, y_test = get_train_val_test_data_for_users(valid_users)

  model = build_model()

  # Stop early to avoid overfit
  stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

  history = model.fit(X_train, y_train, epochs=50, batch_size = 256, validation_data = (X_val, y_val), callbacks=[stop_early])
  test_results = model.evaluate(X_test, y_test)

  model.save(f'{MODELS_DIR}/models/simple_cnn_128_embedding_{iteration}.h5')
  with open(f'{MODELS_DIR}/results/iteration_{iteration}.pkl', 'wb') as f:
      pickle.dump([train_users, test_users, history.history, test_results], f)