# example code of using lstm on high frequency price data

inspired by the [lasagne example code](https://github.com/Lasagne/Lasagne/blob/master/examples/recurrent.py)

In [1]:
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import pandas as pd


In [39]:
def loaddata():
    data=pd.read_csv('hf.txt',usecols=(4,5),dtype=np.float32)
    data=np.array(data)
    target=np.log(data[:,0])
    target=np.diff(target)
    target=np.ndarray.astype(np.sign(target)+1,dtype=np.int32)
    data[:,1]=data[:,1]/100
    data=data.reshape(362023,1,2)
    train_data=data[0:358000,:,:]
    train_target=target[0:358000]
    val_data=data[358000:360000,:,:]
    val_target=target[358000:360000]
    test_data=data[360000:362022,:,:]
    test_target=target[360000:]
    return train_data,train_target,val_data,val_target,test_data,test_target

In [40]:
N_HIDDEN = 100 #number of hidden nodes in the LSTM
GRAD_CLIP = 200 #Gradient higher than this will be clipped

In [41]:
def main(num_epochs=100,learn_rate=0.01):
    print("Building network ...")
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    
    l_in = lasagne.layers.InputLayer(shape=(None, 1, 2))
    
    l_forward = lasagne.layers.RecurrentLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
        W_in_to_hid=lasagne.init.HeUniform(),
        W_hid_to_hid=lasagne.init.HeUniform(),
        nonlinearity=lasagne.nonlinearities.tanh)
    
    l_out = lasagne.layers.DenseLayer(
        l_forward, num_units=3, nonlinearity=lasagne.nonlinearities.softmax)

    target_values = T.ivector('target_output')
    
    prediction = lasagne.layers.get_output(l_out)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_values)
    loss = loss.mean()
    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_values),dtype=theano.config.floatX)
    
    all_params = lasagne.layers.get_all_params(l_out)
    # Compute GD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adagrad(loss, all_params,learn_rate)
    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            loss, updates=updates)
    accuracy = theano.function(
        [l_in.input_var, target_values],acc )
    
    pred=theano.function([l_in.input_var],T.argmax(prediction, axis=1))

    # We'll use this "validation set" to periodically check progress
    
    bestacc=0
    
    print("Training ...")
    try:
        for epoch in range(num_epochs):
            if epoch % 8 == 7:
                learn_rate*=0.96
            X, y,X_val,y_val,X_test,y_test = loaddata()
            train(X, y)
            val_acc = accuracy(X_val, y_val)
            print("Epoch {} validation accuracy = {}".format(epoch, val_acc))
            if val_acc>bestacc:
                bestacc=val_acc
                test_acc=accuracy(X_test, y_test)
                p=pred(X_test)
                print("             best model's test accuracy = {}".format(test_acc))
    except KeyboardInterrupt:
        pass
    print("final model's test accuracy = {}".format(test_acc))
    return(p)


In [42]:
p=main(100)

Building network ...
Computing updates ...
Compiling functions ...
Training ...
Epoch 0 validation accuracy = 0.7985
             best model's test accuracy = 0.6285855588526211
Epoch 1 validation accuracy = 0.805
             best model's test accuracy = 0.6345202769535113
Epoch 2 validation accuracy = 0.8085
             best model's test accuracy = 0.636003956478734
Epoch 3 validation accuracy = 0.8095
             best model's test accuracy = 0.636003956478734
Epoch 4 validation accuracy = 0.811
             best model's test accuracy = 0.6364985163204748
Epoch 5 validation accuracy = 0.813
             best model's test accuracy = 0.6364985163204748
Epoch 6 validation accuracy = 0.814
             best model's test accuracy = 0.6364985163204748
Epoch 7 validation accuracy = 0.8145
             best model's test accuracy = 0.636003956478734
Epoch 8 validation accuracy = 0.8155
             best model's test accuracy = 0.6374876360039565
Epoch 9 validation accuracy = 0.817
         



## A very simple strategy based on the prediction

In [45]:
data=pd.read_csv('hf.txt',usecols=(4,5),dtype=np.float32)
data=np.array(data)
closeprice=data[360000:,0]
closeprice
benchmark=closeprice[0]
dif=np.diff(closeprice)
decision=p-1
strategy_profit=decision*dif
strategy_profit

array([ 0.,  0.,  0., ...,  0.,  0.,  0.])

In [48]:
sp_rate=(closeprice[1:]-benchmark)/benchmark
profit_rate=np.cumsum(strategy_profit)/benchmark
output = pd.DataFrame( data={"sp":sp_rate,"strategy":profit_rate} )
output.to_csv( "strat1.csv", index=False, quoting=1 )

![plot](hf.png)