In [None]:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout,TimeDistributed, Conv1D, MaxPooling1D, Flatten, Reshape, GRU
from keras.regularizers import l2
import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Train Test Split

In [None]:
raw = pd.read_csv("datasets/all_features.csv")
raw = raw.drop(columns = ["SOFR","SOFRVOL","EFFRVOL","OBFR","OBFRVOL","EFFRVOL_DIFF"
                          ,"OBFR_DIFF","OBFRVOL_DIFF","SP500_UNNORM", "SOFR_DIFF", "SOFRVOL_DIFF"]).dropna()

In [None]:
train = raw[raw["date"] < "2018-00-00"]

In [None]:
test = raw[raw["date"] > "2018-00-00"]

In [None]:
len(train), len(test)

In [None]:
train.to_csv("datasets/train.csv")
test.to_csv("datasets/test.csv")

In [None]:
X_train = train.drop(columns = ["SP500_NORM", "date"]).values
y_train = train["SP500_NORM"].values

In [None]:
X_test = test.drop(columns = ["SP500_NORM", "date"]).values
y_test = test["SP500_NORM"].values

In [None]:
def split_sequence(X, y, n_steps):
    new_X = []
    new_y = []
    for i in range(0, len(X)):
        # find the end of this pattern
        end_ix = i + n_steps - 1
        # check if we are beyond the sequence
        if end_ix > len(X)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = X[i:end_ix+1], (y[end_ix] - y[end_ix - 1])
        new_y.append(seq_y)
        new_X.append(seq_x)
    return array(new_X), array(new_y)

In [None]:
def plot_train_stats(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

## LSTM Baseline

In [None]:
n_steps = 365
lstm_size = 200
weight_decay = 1e-3
lr = 1e-4
epochs = 200

In [None]:
n_features = X_train.shape[1]

In [None]:
X_train_splited, y_train_splited = split_sequence(X_train, y_train, n_steps)
X_test_splited, y_test_splited = split_sequence(X_test, y_test, n_steps)

In [None]:
model = Sequential()
model.add(Dense(1024))
model.add(LSTM(lstm_size, activation='relu', input_shape=(n_steps, n_features), return_sequences=True,
               kernel_regularizer=l2(weight_decay), recurrent_regularizer=l2(weight_decay),
               bias_regularizer=l2(weight_decay)))
model.add(Flatten())
model.add(Dense(256, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay)))
model.add(Dense(16, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay)))
model.add(Dense(1, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay), activation = "tanh"))
opt = keras.optimizers.Adam(learning_rate=lr)
model.compile(opt, loss='mse')
# fit model
history = model.fit(X_train_splited, y_train_splited,
          epochs=epochs,  batch_size=64, shuffle=True,
          validation_data=(X_test_splited, y_test_splited))

In [None]:
plot_train_stats(history)

## Stacked LSTM 

In [None]:
n_steps = 30
lstm_size = 200
weight_decay = 1e-2
lr = 1e-4
epochs = 200

In [None]:
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape = (20,50)))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt, loss='mse')

In [None]:
history = model.fit(X_train_splited, y_train_splited,
          epochs=epochs,  batch_size=64, shuffle=True,
          validation_data=(X_test_splited, y_test_splited))

In [None]:
plot_train_stats(history)

## Conv LSTM

In [None]:
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, n_steps, n_features)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))

model.add(Dropout(0.2))
model.add(Reshape((-1,1)))
model.add(LSTM(50, activation='relu'))

model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
history = model.fit(X_train_splited, y_train_splited,
          epochs=epochs,  batch_size=64, shuffle=True,
          validation_data=(X_test_splited, y_test_splited))

In [None]:
plot_train_stats(history)

### GRU

In [None]:
model = Sequential()
model.add(GRU(units=50, return_sequences=True, input_shape=(n_steps,n_features), activation='tanh',
              kernel_regularizer=l2(weight_decay), recurrent_regularizer=l2(weight_decay)))
model.add(Dropout(0.2))
model.add(GRU(units=50, return_sequences=True, input_shape=(n_steps,n_features), activation='tanh',
              kernel_regularizer=l2(weight_decay), recurrent_regularizer=l2(weight_decay)))
model.add(Dropout(0.2))
model.add(GRU(units=50, return_sequences=True, input_shape=(n_steps,n_features), activation='tanh',
              kernel_regularizer=l2(weight_decay), recurrent_regularizer=l2(weight_decay)))
model.add(Dropout(0.2))
model.add(GRU(units=50, activation='tanh', return_sequences=False,
             kernel_regularizer=l2(weight_decay), recurrent_regularizer=l2(weight_decay)))
model.add(Dropout(0.2))
model.add(Dense(units=1,kernel_regularizer=l2(weight_decay) ))
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='mse')

In [None]:
history = model.fit(X_train_splited, y_train_splited,
          epochs=5,  batch_size=64, shuffle=True,
          validation_data=(X_test_splited, y_test_splited))