In [None]:
import quandl
quandl.ApiConfig.api_key = '1w1X-kbMsxdg4Ts1disD'

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM, Flatten, Dropout
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
price_table = quandl.get_table('SHARADAR/SEP', 
                               ticker=['AMD', 'NVDA',
                                       'BAC', 'C',
                                       'FB', 'AMZN', 'NFLX',
                                       'F', 'GM',
                                       'T', 'CMCSA', 'DIS',
                                       'AAPL', 'MSFT', 
                                       'FCX', 'SIRI',
                                       'NUS', 'EL',
                                       'MU', 'INTC',
                                       'UAL', 'AAL',
                                       'TRIP', 'BKNG',
                                       'KO', 'PEP',
                                       'XOM', 'CVX'
                                      ]
                               , paginate=True)

In [None]:
sorted_table = price_table.sort_values(by=['ticker','date'], ascending=True).copy()
tickers = sorted(list(set(sorted_table['ticker'])))
price_stock = price_table[price_table['ticker']==tickers[0]][['date','close']].sort_values(by='date', ascending=True)
price_stock.reset_index(inplace=True)

In [None]:
sns.lineplot(x='date', y='close', data=price_stock)

In [None]:
close_df = pd.DataFrame()
close_df['date'] = price_stock['date']
volume_df = pd.DataFrame()
close_df['date'] = price_stock['date']

for ticker in tickers:
    close_df[ticker] = list(sorted_table[sorted_table['ticker']==ticker]['close'])
    volume_ticker = 'v_' + ticker
    volume_df[volume_ticker] = list(sorted_table[sorted_table['ticker']==ticker]['volume'])

no_date_df = close_df[list(close_df.columns)[1:]]
return_df = no_date_df/no_date_df.shift(1) - 1

In [None]:
stock_corr = return_df.corr()
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(stock_corr, square=True, cmap="RdBu_r");

In [None]:
train_perc = 0.40

return_df_2 = pd.concat([return_df, volume_df], axis=1)

train_len = int(return_df.shape[0] * (train_perc))
stocks_to_trade = ['BAC','C']
volume_to_trade = ['v_'+ticker for ticker in stocks_to_trade]
for i in volume_to_trade:
    stocks_to_trade.append(i)

train = return_df_2[stocks_to_trade][1:train_len].copy()
train['diff'] = train[stocks_to_trade[0]] - train[stocks_to_trade[1]]
train.reset_index(inplace=True, drop=True)

test = return_df_2[stocks_to_trade][train_len:-400].copy()
test['diff'] = test[stocks_to_trade[0]] - test[stocks_to_trade[1]]
test.reset_index(inplace=True, drop=True)

true_test = return_df_2[stocks_to_trade][-400:].copy()
true_test['diff'] = true_test[stocks_to_trade[0]] - true_test[stocks_to_trade[1]]
true_test.reset_index(inplace=True, drop=True)

feature_names = volume_to_trade
feature_names.append('diff')

In [None]:
def make_variables(dataset, lags, look_back, predict_window):
    features = lags+1
    start = look_back
    stop = len(dataset) - lags - predict_window

    lstm_in_X = np.zeros(shape=(stop-start, look_back+1, features))
    lstm_in_Y = np.zeros(shape=(stop-start, look_back+1))

    iter_list = [num for num in range(look_back+1)][::-1]
    for i in range(start, stop):
        for index, j in enumerate(iter_list):
            X = dataset[i-j : i-j+lags+1, -1]
            lstm_in_X[i-start, index] = np.ravel(X)
            Y = dataset[i-j+lags+1, -1]
            lstm_in_Y[i-start, index] = Y
    return lstm_in_X, lstm_in_Y, features

In [None]:
# Create and fit the LSTM network

dataset = np.matrix(train[feature_names])
lags = 30
look_back = 3        # 0 is a look_back of 1, 1 is a look_back of 2, etc.
predict_window = 1

lstm_in_X, lstm_in_Y, features = make_variables(dataset, lags, look_back, predict_window)

train_X = lstm_in_X
train_Y = lstm_in_Y

model = Sequential()
model.add(LSTM(32, input_shape=(look_back+1, features)))
model.add(Dropout(0.2))
model.add(Dense(look_back+1))
model.compile(loss='mean_absolute_error', optimizer='adam')
model.fit(train_X, train_Y, epochs=100, batch_size=25, verbose=1)

In [None]:
pred_Y_train = model.predict(train_X)
predictions = pred_Y_train[:,-1]
actuals = train_Y[:,-1]

fig, ax1 = plt.subplots(figsize=(15,7))
plt.plot(predictions)
plt.plot(actuals)

In [None]:
plt.scatter(x=predictions, y=actuals)

In [None]:
init = 100
position = []
for i, val in enumerate(list(predictions)):
    if val >= 0:
        position.append(1)
    else:
        position.append(-1)
        
plt.plot(np.cumprod((np.array(position)*actuals)+1));

In [None]:
# Expanding window test where I fit the model again every day

expand_set = np.matrix(test[feature_names])

lags = 30
look_back = 3        # 0 is a look_back of 1, 1 is a look_back of 2, etc.
predict_window = 1

predictions_test = []
actuals_test = []

print(expand_set.shape[0] - 1)

for i in range(0, expand_set.shape[0] - 1):
    print(i)
    curr_row = expand_set[i]
    test_row = expand_set[i+1]
    dataset = np.append(dataset, curr_row, axis=0)
    lstm_in_X, lstm_in_Y, features = make_variables(dataset, lags, look_back, predict_window)
    train_X = lstm_in_X
    train_Y = lstm_in_Y
    
    # Fit the data all the way up to curr_row (today) - only fitting every 25 days though
    if i%1 == 0:
        model.fit(train_X, train_Y, epochs=10, batch_size=25, verbose=1)
    
    # Predict the next day (out of sample) - data for next day is in test_row
    dataset_test = np.append(dataset, test_row, axis=0)
    lstm_in_X_test, lstm_in_Y_test, features = make_variables(dataset_test, lags, look_back, predict_window)
    test_X = lstm_in_X_test
    pred_Y_test = model.predict(test_X)
    predict_test = pred_Y_test[-1,-1]
    actual_test = test_row[-1,-1]
    # Store predictions and actuals to for calculating money made and plotting
    predictions_test.append(predict_test)
    actuals_test.append(actual_test)

In [None]:
init = 100
position_test = []
for i, val in enumerate(list(predictions_test)):
    if val >= 0:
        position_test.append(1)
    else:
        position_test.append(-1)
        
plt.plot(np.cumprod((np.array(position_test)*actuals_test)+1));

In [None]:
# Quick (non-expanding) backtest for sanity checking

dataset_test = np.matrix(test[feature_names])
lstm_in_X, lstm_in_Y, features = make_variables(dataset_test, lags, look_back, predict_window)
    
test_X = lstm_in_X
test_Y = lstm_in_Y

pred_Y_test = model.predict(test_X)
predictions_test = pred_Y_test[:,-1]
actuals_test = test_Y[:,-1]

init = 100
position_test = []
for i, val in enumerate(list(predictions_test)):
    if val >= 0:
        position_test.append(1)
    else:
        position_test.append(-1)
        
plt.plot(np.cumprod((np.array(position_test)*actuals_test)+1));