# example code of using lstm on S&P index data

inspired by the [lasagne example code](https://github.com/Lasagne/Lasagne/blob/master/examples/recurrent.py)

In [1]:
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import pandas as pd

def loaddata():
    
    data=pd.read_csv('table.csv',usecols=(1,2,3,4,5),dtype=np.float32)
    data=np.array(data)
    target=np.log(data[2700::-1,3])
    target=np.diff(target)
    target=np.ndarray.astype((np.sign(np.sign(target)+0.001)+1)/2,dtype=np.int32)
    data=data[2700:0:-1,:]/1000
    data[:,4]=data[:,4]/1000000
    data=data.reshape(2700,1,5)
    train_data=data[0:2500,:,:]
    train_target=target[0:2500]
    val_data=data[2500:2600,:,:]
    val_target=target[2500:2600]
    test_data=data[2600:,:,:]
    test_target=target[2600:]
    return train_data,train_target,val_data,val_target,test_data,test_target

In [2]:
loaddata()

(array([[[ 1.21011996,  1.21243989,  1.20506001,  1.21034002,  1.49010003]],
 
        [[ 1.21034002,  1.21132994,  1.20073998,  1.20074999,  1.58012009]],
 
        [[ 1.20074999,  1.20292008,  1.19735003,  1.20158994,  1.55120003]],
 
        ..., 
        [[ 2.02076006,  2.02893996,  2.0044899 ,  2.02255011,  3.94433999]],
 
        [[ 2.02019   ,  2.03829002,  2.01204014,  2.03211999,  3.73007011]],
 
        [[ 2.03430009,  2.06462002,  2.02638006,  2.06314993,  4.17605019]]], dtype=float32),
 array([0, 1, 0, ..., 1, 1, 0], dtype=int32),
 array([[[ 2.06297994,  2.06297994,  2.05053997,  2.05182004,  3.57356   ]],
 
        [[ 2.05041981,  2.05762005,  2.04097009,  2.05709004,  3.46575999]],
 
        [[ 2.04785991,  2.04785991,  2.0199101 ,  2.02955008,  3.3298099 ]],
 
        [[ 2.03234005,  2.04249001,  2.00148988,  2.00216007,  4.06753016]],
 
        [[ 2.00244999,  2.02464008,  1.98918009,  2.02125001,  4.12714005]],
 
        [[ 2.01935005,  2.02331996,  1.99337995,  1.9949

In [3]:
N_HIDDEN = 100
GRAD_CLIP = 200

In [4]:
def main(num_epochs=100,learn_rate=0.01):
    print("Building network ...")
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    
    l_in = lasagne.layers.InputLayer(shape=(None, 1, 5))
    
    l_forward = lasagne.layers.RecurrentLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
        W_in_to_hid=lasagne.init.HeUniform(),
        W_hid_to_hid=lasagne.init.HeUniform(),
        nonlinearity=lasagne.nonlinearities.tanh)
    
    l_out = lasagne.layers.DenseLayer(
        l_forward, num_units=2, nonlinearity=lasagne.nonlinearities.softmax)

    target_values = T.ivector('target_output')
    
    prediction = lasagne.layers.get_output(l_out)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_values)
    loss = loss.mean()
    acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_values),dtype=theano.config.floatX)
    
    all_params = lasagne.layers.get_all_params(l_out)
    # Compute GD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adagrad(loss, all_params,learn_rate)
    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            loss, updates=updates)
    accuracy = theano.function(
        [l_in.input_var, target_values],acc )
    
    pred=theano.function([l_in.input_var],T.argmax(prediction, axis=1))

    # We'll use this "validation set" to periodically check progress
    
    bestacc=0
    
    print("Training ...")
    try:
        for epoch in range(num_epochs):
            if epoch % 15 == 14:
                learn_rate*=0.96
            X, y,X_val,y_val,X_test,y_test = loaddata()
            train(X, y)
            val_acc = accuracy(X_val, y_val)
            print("Epoch {} validation accuracy = {}".format(epoch, val_acc))
            if val_acc>bestacc:
                bestacc=val_acc
                test_acc=accuracy(X_test, y_test)
                p=pred(X_test)
                print("             best model's test accuracy = {}".format(test_acc))
    except KeyboardInterrupt:
        pass
    print("final model's test accuracy = {}".format(test_acc))
    return(p)


In [5]:
p=main(5000)

Building network ...
Computing updates ...
Compiling functions ...
Training ...
Epoch 0 validation accuracy = 0.5
             best model's test accuracy = 0.49
Epoch 1 validation accuracy = 0.51
             best model's test accuracy = 0.53
Epoch 2 validation accuracy = 0.59
             best model's test accuracy = 0.54
Epoch 3 validation accuracy = 0.53
Epoch 4 validation accuracy = 0.59
Epoch 5 validation accuracy = 0.58
Epoch 6 validation accuracy = 0.52
Epoch 7 validation accuracy = 0.57
Epoch 8 validation accuracy = 0.54
Epoch 9 validation accuracy = 0.55
Epoch 10 validation accuracy = 0.52
Epoch 11 validation accuracy = 0.53
Epoch 12 validation accuracy = 0.53
Epoch 13 validation accuracy = 0.52
Epoch 14 validation accuracy = 0.52
Epoch 15 validation accuracy = 0.51
Epoch 16 validation accuracy = 0.51
Epoch 17 validation accuracy = 0.51
Epoch 18 validation accuracy = 0.51
Epoch 19 validation accuracy = 0.51
Epoch 20 validation accuracy = 0.51
Epoch 21 validation accuracy = 0.5



In [6]:
data=pd.read_csv('table.csv',usecols=(1,2,3,4,5),dtype=np.float32)
data=np.array(data)
closeprice=data[100::-1,3]

In [7]:
closeprice
benchmark=closeprice[0]
dif=np.diff(closeprice)
decision=p*2-1
strategy_profit=decision*dif

In [8]:
strategy_profit

array([ -2.08000488e+01,   1.12500000e+01,   1.28601074e+01,
        -1.34985352e+00,   1.56198730e+01,   6.27001953e+00,
         6.99951172e-01,  -4.39702148e+01,   5.47021484e+00,
         1.43098145e+01,  -6.39892578e-01,   8.02001953e+00,
         1.25800781e+01,  -3.46600342e+01,   4.63000488e+00,
         2.53100586e+01,  -2.29799805e+01,  -9.34985352e+00,
         1.55004883e+00,  -1.68901367e+01,  -2.34985352e+00,
        -1.64013672e+00,   9.07006836e+00,   5.06005859e+00,
        -1.20000000e+01,  -2.25000000e+01,  -1.20100098e+01,
         2.56101074e+01,   1.53200684e+01,   5.98144531e-02,
        -4.78979492e+00,  -5.80004883e+00,   4.71997070e+00,
         6.52001953e+00,  -1.62800293e+01,  -5.98999023e+00,
         2.66098633e+01,  -2.01098633e+01,   1.97998047e+00,
        -2.66015625e+00,  -8.15014648e+00,  -1.08999023e+01,
         5.52001953e+00,   1.73098145e+01,  -4.38801270e+01,
        -6.48399658e+01,  -7.76800537e+01,  -2.55999756e+01,
         7.29000244e+01,

In [17]:
sp_rate=(closeprice[1:]-benchmark)/benchmark
profit_rate=np.cumsum(strategy_profit)/benchmark
output = pd.DataFrame( data={"sp":sp_rate,"strategy":profit_rate} )
output.to_csv( "strat.csv", index=False, quoting=1 )

![plot](https://github.com/volpato30/CNN/blob/master/lstm/plot.png)