In [1]:
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation, LeakyReLU, BatchNormalization, LSTM, Bidirectional, Input, Concatenate
from keras import backend as K
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
from keras.utils import plot_model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
df = pd.read_csv('../options-df-sigma.csv')
df = df.dropna(axis=0)
df = df.drop(columns=['exdate', 'impl_volatility', 'volume', 'open_interest', 'sigma_20'])
df.strike_price = df.strike_price / 1000
call_df = df[df.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = df[df.cp_flag == 'P'].drop(['cp_flag'], axis=1)
underlying = pd.read_csv('../daily-closing-prices.csv')

In [3]:
underlying.head()

Unnamed: 0,date,close
0,19960102,620.73
1,19960103,621.32
2,19960104,617.7
3,19960105,616.71
4,19960108,618.46


In [4]:
N_TIMESTEPS = 20

In [5]:
# padded = np.insert(underlying.close.values, 0, np.array([np.nan] * N_TIMESTEPS))
# rolled = np.column_stack([np.roll(padded, i) for i in range(N_TIMESTEPS)])
# rolled = rolled[~np.isnan(rolled).any(axis=1)]
# rolled = np.column_stack((underlying.date.values[N_TIMESTEPS - 1:], rolled))
# price_history = pd.DataFrame(data=rolled)
# joined = df.join(price_history.set_index(0), on='date')
# call_df = joined[joined.cp_flag == 'C'].drop(['cp_flag'], axis=1)

In [6]:
call_df.tail()

Unnamed: 0,date,strike_price,best_bid,best_offer,date_ndiff,treasury_rate,closing_price
10751018,20171229,2950.0,25.1,26.5,367,1.76,2673.61
10751019,20171229,3000.0,15.9,17.1,367,1.76,2673.61
10751020,20171229,3050.0,10.0,10.9,367,1.76,2673.61
10751021,20171229,3100.0,6.4,7.1,367,1.76,2673.61
10751022,20171229,3200.0,2.85,3.6,367,1.76,2673.61


In [7]:
padded = np.insert(underlying.close.values, 0, np.array([np.nan] * N_TIMESTEPS))
rolled = np.column_stack([np.roll(padded, i) for i in range(N_TIMESTEPS)])
rolled = rolled[~np.isnan(rolled).any(axis=1)]
rolled = np.column_stack((underlying.date.values[N_TIMESTEPS - 1:], rolled))
price_history = pd.DataFrame(data=rolled)
joined = df.join(price_history.set_index(0), on='date')
call_df = joined[joined.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = joined[joined.cp_flag == 'P'].drop(['cp_flag'], axis=1)
call_df = call_df.drop(columns=['date'])
put_df = put_df.drop(columns=['date'])
call_X_train, call_X_test, call_y_train, call_y_test = train_test_split(call_df.drop(['best_bid', 'best_offer'], axis=1).values,
                                                                        ((call_df.best_bid + call_df.best_offer) / 2).values,
                                                                        test_size=0.01, random_state=42)
put_X_train, put_X_test, put_y_train, put_y_test = train_test_split(put_df.drop(['best_bid', 'best_offer'], axis=1).values,
                                                                    ((put_df.best_bid + put_df.best_offer) / 2).values,
                                                                    test_size=0.01, random_state=42)
call_X_train = [call_X_train[:, -N_TIMESTEPS:].reshape(call_X_train.shape[0], N_TIMESTEPS, 1), call_X_train[:, :4]]
call_X_test = [call_X_test[:, -N_TIMESTEPS:].reshape(call_X_test.shape[0], N_TIMESTEPS, 1), call_X_test[:, :4]]
put_X_train = [put_X_train[:, -N_TIMESTEPS:].reshape(put_X_train.shape[0], N_TIMESTEPS, 1), put_X_train[:, :4]]
put_X_test = [put_X_test[:, -N_TIMESTEPS:].reshape(put_X_test.shape[0], N_TIMESTEPS, 1), put_X_test[:, :4]]

In [8]:
layers = 4
n_timesteps = 60
features = 4
n_batch = 4096
n_epochs = 100

In [9]:
def make_model():
    close_history = Input((N_TIMESTEPS, 1))
    input2 = Input((features,))
    
    lstm = Sequential()
    lstm.add(LSTM(units=8, input_shape=(N_TIMESTEPS, 1), return_sequences=True))
    lstm.add(LSTM(units=8, return_sequences=True))
    lstm.add(LSTM(units=8, return_sequences=True))
    lstm.add(LSTM(units=8, return_sequences=False))
    input1 = lstm(close_history)
    
    connect = Concatenate()([input1, input2])
    
    for _ in range(layers - 1):
        connect = Dense(100)(connect)
        connect = BatchNormalization()(connect)
        connect = LeakyReLU()(connect)
    
    predict = Dense(1, activation='relu')(connect)

    return Model(inputs=[close_history, input2], outputs=predict)

In [10]:
call_model = load_model('saved-models/20191207-call-lstm-v3.h5')
put_model = make_model()







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
call_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 20, 1)        0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 8)            1952        input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4)            0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 12)           0           sequential_1[1][0]               
                                                                 input_2[0][0]                    
__________

In [15]:
call_model.compile(optimizer=Adam(lr=1e-2), loss='mse')
history = call_model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
call_model.save('saved-models/20191207-call-lstm-v1.h5')

Train on 5001241 samples, validate on 50518 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
call_model.compile(optimizer=Adam(lr=1e-3), loss='mse')
history = call_model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=5, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
call_model.save('saved-models/20191207-call-lstm-v2.h5')

Train on 5001241 samples, validate on 50518 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
call_model.compile(optimizer=Adam(lr=1e-4), loss='mse')
history = call_model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=5, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
call_model.save('saved-models/20191207-call-lstm-v3.h5')

Train on 5001241 samples, validate on 50518 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
put_model.compile(optimizer=Adam(lr=1e-2), loss='mse')
history = put_model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
put_model.save('saved-models/20191207-put-lstm-v1.h5')

Train on 5535888 samples, validate on 55919 samples


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
put_model.compile(optimizer=Adam(lr=1e-3), loss='mse')
history = put_model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=5, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
put_model.save('saved-models/20191207-put-lstm-v2.h5')

Train on 5535888 samples, validate on 55919 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
put_model.compile(optimizer=Adam(lr=1e-4), loss='mse')
history = put_model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=5, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)
put_model.save('saved-models/20191207-put-lstm-v3.h5')

Train on 5535888 samples, validate on 55919 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
