In [1]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, LeakyReLU, BatchNormalization, LSTM, Input, Concatenate
from keras import backend as K
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
from keras.utils import plot_model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [96]:
layers = 4
n_timesteps = 20
features = 4
n_batch = 4096
n_epochs = 100

In [7]:
def make_model():
    close_history = Input((n_timesteps, 1))
    input2 = Input((features,))
    
    lstm = Sequential()
    lstm.add(LSTM(units=4, input_shape=(n_timesteps, 1), return_sequences=True))
    lstm.add(LSTM(units=4, return_sequences=True))
    lstm.add(LSTM(units=4, return_sequences=True))
    lstm.add(LSTM(units=4, return_sequences=False))
    input1 = lstm(close_history)
    
    connect = Concatenate()([input1, input2])
    
    for _ in range(layers - 1):
        connect = Dense(100)(connect)
        connect = BatchNormalization()(connect)
        connect = LeakyReLU()(connect)
    
    predict = Dense(1, activation='relu')(connect)

    return Model(inputs=[close_history, input2], outputs=predict)

In [4]:
model = make_model()

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 20, 1)        0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 4)            528         input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4)            0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 8)            0           sequential_1[1][0]               
                                                                 input_2[0][0]                    
__________

In [8]:
model.compile(loss='mse', optimizer=Adam())

In [14]:
df = pd.read_csv('options-df-sigma.csv')
df = df.dropna(axis=0)
df = df.drop(columns=['exdate', 'impl_volatility', 'volume', 'open_interest', 'sigma_20'])
df.strike_price = df.strike_price / 1000
call_df = df[df.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = df[df.cp_flag == 'P'].drop(['cp_flag'], axis=1)

In [15]:
underlying = pd.read_csv('daily-closing-prices.csv')

In [16]:
df.head()

Unnamed: 0,date,cp_flag,strike_price,best_bid,best_offer,date_ndiff,treasury_rate,closing_price
0,19960129,C,600.0,28.0,29.0,47,5.17,624.22
1,19960129,P,615.0,5.375,5.75,47,5.17,624.22
2,19960129,P,550.0,10.125,11.75,509,5.05,624.22
3,19960129,P,610.0,9.5,10.0,145,5.12,624.22
4,19960129,P,600.0,2.625,2.875,47,5.17,624.22


In [18]:
underlying.head(20)

Unnamed: 0,date,close
0,19960102,620.73
1,19960103,621.32
2,19960104,617.7
3,19960105,616.71
4,19960108,618.46
5,19960109,609.45
6,19960110,598.48
7,19960111,602.69
8,19960112,601.81
9,19960115,599.82


In [68]:
padded = np.insert(underlying.close.values, 0, np.array([np.nan] * 20))

In [69]:
rolled = np.column_stack([np.roll(padded, i) for i in range(20)])

In [70]:
rolled = rolled[~np.isnan(rolled).any(axis=1)]

In [76]:
rolled.shape

(5520, 20)

In [78]:
rolled = np.column_stack((underlying.date.values[19:], rolled))

In [82]:
price_history = pd.DataFrame(data=rolled)

In [86]:
joined = df.join(price_history.set_index(0), on='date')

In [88]:
call_df = joined[joined.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = joined[joined.cp_flag == 'P'].drop(['cp_flag'], axis=1)

In [90]:
call_df = call_df.drop(columns=['date'])
put_df = put_df.drop(columns=['date'])

In [99]:
call_X_train, call_X_test, call_y_train, call_y_test = train_test_split(call_df.drop(['best_bid', 'best_offer'], axis=1).values,
                                                                        ((call_df.best_bid + call_df.best_offer) / 2).values,
                                                                        test_size=0.01, random_state=42)
put_X_train, put_X_test, put_y_train, put_y_test = train_test_split(put_df.drop(['best_bid', 'best_offer'], axis=1).values,
                                                                    ((put_df.best_bid + put_df.best_offer) / 2).values,
                                                                    test_size=0.01, random_state=42)

In [100]:
call_X_train = [call_X_train[:, -20:].reshape(call_X_train.shape[0], 20, 1), call_X_train[:, :4]]
call_X_test = [call_X_test[:, -20:].reshape(call_X_test.shape[0], 20, 1), call_X_test[:, :4]]
put_X_train = [put_X_train[:, -20:].reshape(put_X_train.shape[0], 20, 1), put_X_train[:, :4]]
put_X_test = [put_X_test[:, -20:].reshape(put_X_test.shape[0], 20, 1), put_X_test[:, :4]]

In [102]:
history = model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=n_epochs, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Instructions for updating:
Use tf.cast instead.
Train on 5001241 samples, validate on 50518 samples
Epoch 1/100
 610304/5001241 [==>...........................] - ETA: 3:25 - loss: 138703.5878

KeyboardInterrupt: 