In [6]:
import math
import multiprocessing
from datetime import datetime

import numpy as np
from keras import Input, Model
from keras import backend as K
from keras import layers
from keras.layers import Layer
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.regularizers import L2
from sklearn.metrics import accuracy_score, classification_report
from keras.callbacks import ModelCheckpoint

from LobTransformer import TransformerBlock
import tensorflow as tf


# download FI2010 dataset from https://etsin.fairdata.fi/dataset/73eb48d7-4dbc-4a10-a52a-da745b47a649

FI2010_DIR='/home/vslaykovsky/Downloads/BenchmarkDatasets/'


In [8]:


class PositionalEncodingLayer(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, x, *args, **kwargs):
        steps, d_model = x.get_shape().as_list()[-2:]
        ps = np.zeros([steps, 1], dtype=K.floatx())
        for tx in range(steps):
            ps[tx, :] = [(2 / (steps - 1)) * tx - 1]

        ps_expand = K.expand_dims(K.constant(ps), axis=0)
        ps_tiled = K.tile(ps_expand, [K.shape(x)[0], 1, 1])

        x = K.concatenate([x, ps_tiled], axis=-1)
        return x


def translob_model(**kwargs):
    inputs = Input(shape=(kwargs.get('sequence_length', 100), 40))
    x = inputs
    max_conv_filters = kwargs.get('num_conv_filters', 14)
    max_conv_dilation = kwargs.get('max_conv_dilation', 16)
    for dilation in [2 ** v for v in list(range(math.ceil(math.log2(max_conv_dilation)) + 1))]:
        x = layers.Conv1D(
            max_conv_filters, kernel_size=2, dilation_rate=dilation, activation='relu', padding='causal'
        )(x)
    x = layers.LayerNormalization()(x)
    x = PositionalEncodingLayer()(x)
    tb = TransformerBlock('tb1', kwargs.get('num_attention_heads', 3), True)
    blocks = kwargs.get('num_transformer_blocks', 2)
    for block in range(blocks):
        if kwargs.get('transformer_blocks_share_weights', True):
            x = tb(x)
        else:
            x = TransformerBlock(f'transformer_block_{block}', kwargs.get('num_attention_heads', 3), True)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(64, 
                     activation='relu', 
                     kernel_regularizer='l2', 
                     kernel_initializer='glorot_uniform')(x)
    x = layers.Dropout(kwargs.get('dropout_rate', 0.1))(x)
    out = layers.Dense(3, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=out)
    model.summary()

    model.compile(
        tf.keras.optimizers.Adam(
            learning_rate=kwargs.get('lr', 0.0001),
            beta_1=kwargs.get('adam_beta1', 0.9),
            beta_2=kwargs.get('adam_beta2', 0.999),
            name="Adam",
        ),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=['sparse_categorical_accuracy'],
    )
    return model
                        

def train_translob(X_train, y_train, X_val, y_val, **kwargs):
    print('Train', X_train.shape, y_train.shape, 'Val', X_val.shape, y_val.shape)
    model = translob_model(**kwargs)

    length = kwargs.get('sequence_length', 100)
    train_gen = TimeseriesGenerator(X_train, y_train, length, shuffle=True, batch_size=kwargs.get('batch_size', 32))
    val_gen = TimeseriesGenerator(X_val, y_val, length, batch_size=kwargs.get('batch_size', 32))

    model.fit(
        train_gen,
        epochs=kwargs.get('epochs', 100),
        callbacks=[
            tf.keras.callbacks.TensorBoard(log_dir=(
                    "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S"))
            ),
            tf.keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', mode='max', patience=10, min_delta=0.0002),
#             ModelCheckpoint('mdl.hdf5', save_best_only=True, monitor='val_loss', mode='min')
        ],
        validation_data=val_gen
    )
    return model


def gen_data(data, horizon):
    x = data[:40, :].T  # 40 == 10 price + volume asks + 10 price + volume bids
    y = data[-5 + horizon, :].T  # 5
    return x[:-1], (y[1:] - 1).astype(np.int32)  # shift y by 1



def load_dataset(horizon):
    dec_data = np.loadtxt(f'{FI2010_DIR}/NoAuction/1.NoAuction_Zscore/NoAuction_Zscore_Training/Train_Dst_NoAuction_ZScore_CF_7.txt')
    dec_train = dec_data[:, :int(np.floor(dec_data.shape[1] * 0.8))]
    dec_val = dec_data[:, int(np.floor(dec_data.shape[1] * 0.8)):]

    dec_test1 = np.loadtxt(f'{FI2010_DIR}/NoAuction/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_7.txt')
    dec_test2 = np.loadtxt(f'{FI2010_DIR}/NoAuction/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_8.txt')
    dec_test3 = np.loadtxt(f'{FI2010_DIR}/NoAuction/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_9.txt')
    dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

    return gen_data(dec_train, horizon), gen_data(dec_val, horizon), gen_data(dec_test, horizon)


def eval(model, X_test, y_test, **kwargs):
    ts = TimeseriesGenerator(X_test, y_test, kwargs.get('sequence_length', 100), batch_size=32, shuffle=False)
    y_true = np.concatenate([y for x, y in ts])
    y_pred = np.argmax(model.predict(ts), -1)
    print(classification_report(y_true, y_pred))    
    return classification_report(y_true, y_pred, output_dict=True)['weighted avg']['f1-score']
    


(X_train, y_train), (X_val, y_val), (X_test, y_test) = load_dataset(horizon=4)

params = {
    # inputs
    'sequence_length': 100,
    # model
    'num_conv_filters': 14,
    'max_conv_dilation': 16,
    'num_attention_heads': 3,
    'num_transformer_blocks': 2,
    'transformer_blocks_share_weights': True, 
    'dropout_rate': 0.1,
    # training
    'lr': 0.0001,
    'adam_beta1': 0.9,
    'adam_beta2': 0.999,
    'batch_size': 32,
    'epochs': 150
}



model = train_translob(X_train, y_train, X_val, y_val, **params)
eval(model, X_val, y_val, **params)
eval(model, X_test, y_test, **params)


Train (203799, 40) (203799,) Val (50949, 40) (50949,)
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 100, 40)]    0                                            
__________________________________________________________________________________________________
conv1d_15 (Conv1D)              (None, 100, 14)      1134        input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_16 (Conv1D)              (None, 100, 14)      406         conv1d_15[0][0]                  
__________________________________________________________________________________________________
conv1d_17 (Conv1D)              (None, 100, 14)      406         conv1d_16[0][0]                  
______________________________________

KeyboardInterrupt: 

# Hyperparameter optimization

In [32]:
import optuna

def test_model(params):
    model = train_translob(X_train, y_train, X_val, y_val, **params)
    eval(model, X_val, y_val, **params)
    return eval(model, X_test, y_test, **params)

def optuna_objective(trial):
    params = {
        # inputs
        'sequence_length': 100,
        # model
        'num_conv_filters': trial.suggest_categorical('num_conv_filters', [14, 29]),
        'max_conv_dilation': trial.suggest_categorical('max_conv_dilation', [16, 32]),
        'num_attention_heads': trial.suggest_categorical('num_attention_heads', [3, 5]),
        'num_transformer_blocks': trial.suggest_categorical('num_transformer_blocks', [2, 3, 4]),
        'transformer_blocks_share_weights': trial.suggest_categorical('transformer_blocks_share_weights', [True, False]),
        'dropout_rate': trial.suggest_float('dropout_rate', 0.0, 0.3),
        # training
        'lr': trial.suggest_float('learning_rate', 0.00001, 0.01, log=True),
        'adam_beta1': 0.9,
        'adam_beta2': 0.999,
        # 'batch_size': 32,
        #'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64]),
        'batch_size': 256,
        'epochs': 150
    }
    return test_model(params)
   
study = optuna.create_study(direction='maximize')
study.optimize(optuna_objective, n_trials=100)

[32m[I 2021-07-25 17:36:05,346][0m A new study created in memory with name: no-name-1164c60e-3544-428a-8f8b-b6ba658cd6de[0m


Train (203799, 40) (203799,) Val (50949, 40) (50949,)
Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 100, 40)]         0         
_________________________________________________________________
conv1d_74 (Conv1D)           (None, 100, 14)           1134      
_________________________________________________________________
conv1d_75 (Conv1D)           (None, 100, 14)           406       
_________________________________________________________________
conv1d_76 (Conv1D)           (None, 100, 14)           406       
_________________________________________________________________
conv1d_77 (Conv1D)           (None, 100, 14)           406       
_________________________________________________________________
conv1d_78 (Conv1D)           (None, 100, 14)           406       
_________________________________________________________________
conv

              precision    recall  f1-score   support

           0       0.43      0.85      0.58     18931
           1       0.62      0.63      0.62     13201
           2       0.42      0.01      0.02     18717

    accuracy                           0.48     50849
   macro avg       0.49      0.50      0.41     50849
weighted avg       0.48      0.48      0.38     50849



[32m[I 2021-07-25 17:50:29,042][0m Trial 0 finished with value: 0.4340597140988169 and parameters: {'num_conv_filters': 14, 'max_conv_dilation': 32, 'num_attention_heads': 5, 'num_transformer_blocks': 3, 'transformer_blocks_share_weights': False, 'dropout_rate': 0.2338475471159294, 'learning_rate': 0.0006509155238774959}. Best is trial 0 with value: 0.4340597140988169.[0m


              precision    recall  f1-score   support

           0       0.43      0.85      0.57     47913
           1       0.72      0.61      0.66     48050
           2       0.28      0.02      0.03     43523

    accuracy                           0.51    139486
   macro avg       0.48      0.49      0.42    139486
weighted avg       0.48      0.51      0.43    139486

Train (203799, 40) (203799,) Val (50949, 40) (50949,)
Model: "model_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        [(None, 100, 40)]         0         
_________________________________________________________________
conv1d_80 (Conv1D)           (None, 100, 14)           1134      
_________________________________________________________________
conv1d_81 (Conv1D)           (None, 100, 14)           406       
_________________________________________________________________
conv1d_82 (Conv1D) 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.37      1.00      0.54     18931
           1       0.00      0.00      0.00     13201
           2       0.00      0.00      0.00     18717

    accuracy                           0.37     50849
   macro avg       0.12      0.33      0.18     50849
weighted avg       0.14      0.37      0.20     50849



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2021-07-25 17:54:58,440][0m Trial 1 finished with value: 0.2630675492734941 and parameters: {'num_conv_filters': 14, 'max_conv_dilation': 16, 'num_attention_heads': 3, 'num_transformer_blocks': 3, 'transformer_blocks_share_weights': False, 'dropout_rate': 0.2962835591242553, 'learning_rate': 0.001852804968017065}. Best is trial 0 with value: 0.4340597140988169.[0m


              precision    recall  f1-score   support

           0       0.31      0.70      0.43     47913
           1       0.00      0.00      0.00     48050
           2       0.46      0.31      0.37     43523

    accuracy                           0.34    139486
   macro avg       0.26      0.34      0.27    139486
weighted avg       0.25      0.34      0.26    139486

Train (203799, 40) (203799,) Val (50949, 40) (50949,)
Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_17 (InputLayer)           [(None, 100, 40)]    0                                            
__________________________________________________________________________________________________
conv1d_85 (Conv1D)              (None, 100, 29)      2349        input_17[0][0]                   
____________________________________________________

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.37      1.00      0.54     18931
           1       0.00      0.00      0.00     13201
           2       0.00      0.00      0.00     18717

    accuracy                           0.37     50849
   macro avg       0.12      0.33      0.18     50849
weighted avg       0.14      0.37      0.20     50849



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2021-07-25 18:00:53,336][0m Trial 2 finished with value: 0.17564623096061632 and parameters: {'num_conv_filters': 29, 'max_conv_dilation': 32, 'num_attention_heads': 3, 'num_transformer_blocks': 4, 'transformer_blocks_share_weights': True, 'dropout_rate': 0.13613947801791706, 'learning_rate': 0.0037159445431409004}. Best is trial 0 with value: 0.4340597140988169.[0m


              precision    recall  f1-score   support

           0       0.34      1.00      0.51     47913
           1       0.00      0.00      0.00     48050
           2       0.00      0.00      0.00     43523

    accuracy                           0.34    139486
   macro avg       0.11      0.33      0.17    139486
weighted avg       0.12      0.34      0.18    139486

Train (203799, 40) (203799,) Val (50949, 40) (50949,)
Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_18 (InputLayer)           [(None, 100, 40)]    0                                            
__________________________________________________________________________________________________
conv1d_91 (Conv1D)              (None, 100, 29)      2349        input_18[0][0]                   
____________________________________________________

Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150

KeyboardInterrupt: 