source: http://www.jakob-aungiers.com/articles/a/LSTM-Neural-Network-for-Time-Series-Prediction

In [None]:
# only works on keras==1.2.2 tf==0.12.1

In [1]:
import os
import time
import warnings
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

Using TensorFlow backend.


In [61]:
def load_data(filename, seq_len, normalise_window):
    f = open(filename, 'rb').read()
    data = f.decode().split('\n')

    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])

    
    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]

    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]


In [62]:
filename = 'sp500.csv'
seq_len = 50
normalise_window = True

In [69]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings

def load_data(filename, seq_len, normalise_window):
    f = open(filename, 'rb').read()
    data = f.decode().split('\n')[:-1]

    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    np.random.seed(42)
    
    result = np.array(result)
    np.random.shuffle(result)
    
    
    if normalise_window:
        result = normalise_windows(result)

    result = np.array(result)

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

def normalise_windows(window_data):
    normalised_data = []
    for window in window_data:
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data

In [70]:
def build_model():
    model = Sequential()

    model.add(LSTM(
        input_dim= 1,
        output_dim= 50,
        return_sequences=False))
    model.add(Dropout(0.4))

    model.add(Dense(
        output_dim= 1))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    return model

In [71]:
X_train, y_train, X_test, y_test = load_data(filename, seq_len, True)

num = 50
X_train = X_train[:num,]
y_train = y_train[:num]
print('> Data Loaded. Compiling...')

model = build_model()
print("one datapoint from X_train: ", X_train[0][1])

> Data Loaded. Compiling...
('> Compilation Time : ', 0.02526092529296875)
('one datapoint from X_train: ', array([-0.00478023]))


In [67]:
model.fit(
    X_train,
    y_train,
    batch_size=512,
    nb_epoch=2,
    validation_split=0.05)


Train on 47 samples, validate on 3 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f0353102690>

In [68]:
from numpy import newaxis
y_te_pred = model.predict(X_test)

In [27]:
import matplotlib.pyplot as plt
plt.plot(range(len(y_test)),y_te_pred)
plt.plot(range(len(y_test)),y_test)
plt.show()

In [18]:
X_train.shape

(50, 50, 1)

In [41]:
error = np.abs(y_test.reshape((-1,1)) - y_te_pred)

In [40]:
y_te_pred.shape

(412, 1)

In [42]:
plt.scatter(y_test,error)
plt.show()