In [4]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LeakyReLU, BatchNormalization
from keras import backend
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [12]:
# Hyperparams
n_units = 400
layers = 4
n_batch = 4096
n_epochs = 50

In [6]:
df = pd.read_csv('options-df-sigma.csv')
df = df.dropna(axis=0)
df = df.drop(columns=['date', 'exdate', 'impl_volatility', 'volume', 'open_interest'])
df.strike_price = df.strike_price / 1000
call_df = df[df.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = df[df.cp_flag == 'P'].drop(['cp_flag'], axis=1)

In [4]:
call_df.head()

Unnamed: 0,strike_price,best_bid,best_offer,volume,open_interest,date_ndiff,treasury_rate,closing_price,sigma_20
0,600.0,28.0,29.0,4,8555,47,5.17,624.22,0.007761
10,475.0,152.875,153.875,0,2700,145,5.12,624.22,0.007761
13,600.0,52.375,53.375,0,2155,327,5.05,624.22,0.007761
17,610.0,20.0,20.75,923,8860,47,5.17,624.22,0.007761
18,675.0,34.0,35.0,0,350,691,5.1,624.22,0.007761


In [8]:
call_X_train, call_X_test, call_y_train, call_y_test = train_test_split(call_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                        call_df[['best_bid', 'best_offer']],
                                                                        test_size=0.01, random_state=42)
put_X_train, put_X_test, put_y_train, put_y_test = train_test_split(put_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                    put_df[['best_bid', 'best_offer']],
                                                                    test_size=0.01, random_state=42)

In [9]:
model = Sequential()
model.add(Dense(n_units, input_dim=call_X_train.shape[1]))
model.add(LeakyReLU())

for _ in range(layers - 1):
    model.add(Dense(n_units))
    model.add(BatchNormalization())
    model.add(LeakyReLU())

model.add(Dense(2, activation='relu'))

model.compile(loss='mse', optimizer=Adam())







In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 400)               2400      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 400)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_1 (Batch (None, 400)               1600      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 400)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_2 (Batch (None, 400)               1600      
__________

In [13]:
history = model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=n_epochs, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)


Train on 5001241 samples, validate on 50518 samples


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
 258048/5001241 [>.............................] - ETA: 31s - loss: 96.8326

KeyboardInterrupt: 

In [14]:
model.save('mlp2-call30.h5')

In [15]:
call_y_pred30 = model.predict(call_X_test)

In [16]:
print('equilibrium mse', np.mean(np.square(np.mean(call_y_test.values, axis=1) - np.mean(call_y_pred30, axis=1))))
print('spread mse', np.mean(np.square(np.diff(call_y_test.values, axis=1) - np.diff(call_y_pred30, axis=1))))

equilibrium mse 289.3989112870401
spread mse 9.904612136909982


In [17]:
model.compile(loss='mse', optimizer=Adam(1e-4))
history = model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=n_epochs, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5001241 samples, validate on 50518 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
 163840/5001241 [..............................] - ETA: 34s - loss: 71.5201

KeyboardInterrupt: 

In [18]:
model.save('mlp2-call40.h5')

In [19]:
call_y_pred40 = model.predict(call_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(call_y_test.values, axis=1) - np.mean(call_y_pred40, axis=1))))
print('spread mse', np.mean(np.square(np.diff(call_y_test.values, axis=1) - np.diff(call_y_pred40, axis=1))))

equilibrium mse 287.78500592993214
spread mse 18.71940528005765


In [20]:
model.compile(loss='mse', optimizer=Adam(1e-5))
history = model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=n_epochs, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5001241 samples, validate on 50518 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
 286720/5001241 [>.............................] - ETA: 31s - loss: 68.1738

KeyboardInterrupt: 

In [21]:
model.save('mlp2-call50.h5')
call_y_pred50 = model.predict(call_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(call_y_test.values, axis=1) - np.mean(call_y_pred50, axis=1))))
print('spread mse', np.mean(np.square(np.diff(call_y_test.values, axis=1) - np.diff(call_y_pred50, axis=1))))

equilibrium mse 17.790098248083083
spread mse 7.623696717293795


In [22]:
model.compile(loss='mse', optimizer=Adam(1e-6))
history = model.fit(call_X_train, call_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5001241 samples, validate on 50518 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
model.save('mlp2-call60.h5')
call_y_pred60 = model.predict(call_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(call_y_test.values, axis=1) - np.mean(call_y_pred60, axis=1))))
print('spread mse', np.mean(np.square(np.diff(call_y_test.values, axis=1) - np.diff(call_y_pred60, axis=1))))

equilibrium mse 15.2128482637227
spread mse 7.5714239975514355
