source: http://www.jakob-aungiers.com/articles/a/LSTM-Neural-Network-for-Time-Series-Prediction

In [None]:
# only works on keras==1.2.2 tf==0.12.1

In [None]:
When building the dataset, if you skip every k points, the predictabiltiy will lose very quickly.
It is not how many next points to predict is the key.

In [45]:
import os
import time
import warnings
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

import tensorflow as tf

In [46]:
filename = 'sp500.csv'
seq_len = 50
normalise_window = True

In [97]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings

def load_data(filename, seq_len,next_n, normalise_window):
    f = open(filename, 'rb').read()
    data = f.decode().split('\n')[:-1]

    sequence_length = seq_len + 1
    result = []
    for index in range(0,len(data) - sequence_length,next_n):
        result.append(data[index: index + sequence_length])
    
    if normalise_window:
        result = normalise_windows(result)


    np.random.seed(42)
    
    result = np.array(result)
    np.random.shuffle(result)        

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-next_n]
    y_train = train[:, -next_n:]
    x_test = result[int(row):, :-next_n]
    y_test = result[int(row):, -next_n:]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data

In [98]:
def MSE_2(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.squared_difference(y_true,y_pred)))

In [99]:
def build_model(output_n=1):
    model = Sequential()

    model.add(LSTM(
        input_dim= 1,
        output_dim= 50,
        return_sequences=False))
    model.add(Dropout(0.4))

    model.add(Dense(
        output_dim= output_n))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss=MSE_2, optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    return model

In [109]:
output_num = 8
X_train, y_train, X_test, y_test = load_data(filename, seq_len,output_num, True)

print('> Data Loaded. Compiling...')

model = build_model(output_num)
print("X_train shape: ", X_train.shape)

> Data Loaded. Compiling...
('> Compilation Time : ', 0.030575990676879883)
('X_train shape: ', (464, 43, 1))


In [110]:
model.fit(
    X_train,
    y_train,
    batch_size=512,
    nb_epoch=2,
    validation_split=0.05)


Train on 440 samples, validate on 24 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f7b60ee9550>

In [111]:
from numpy import newaxis
y_te_pred = model.predict(X_test)

In [112]:
y_train.shape

(464, 8)

In [113]:
import matplotlib.pyplot as plt
plt.plot(range(len(y_test[:,1])),y_te_pred[:,1])
plt.plot(range(len(y_test[:,1])),y_test[:,1])
plt.show()

In [114]:
X_train.shape

(464, 43, 1)