In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('../options-df-sigma.csv')
df = df.dropna(axis=0)
df = df.drop(columns=['date', 'exdate', 'impl_volatility', 'volume', 'open_interest'])
df.strike_price = df.strike_price / 1000
call_df = df[df.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = df[df.cp_flag == 'P'].drop(['cp_flag'], axis=1)

In [3]:
call_X_train, call_X_test, call_y_train, call_y_test = train_test_split(call_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                        (call_df.best_bid + call_df.best_offer) / 2,
                                                                        test_size=0.01, random_state=42)
put_X_train, put_X_test, put_y_train, put_y_test = train_test_split(put_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                    (put_df.best_bid + put_df.best_offer) / 2,
                                                                    test_size=0.01, random_state=42)

In [4]:
from keras.models import load_model
call = load_model('mlp1-call30.h5')
put = load_model('mlp1-put30.h5')

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [35]:
from scipy.stats import norm
def black_scholes(row):
    S = row.closing_price
    X = row.strike_price
    T = row.date_ndiff / 365
    r = row.treasury_rate / 100
    σ = row.sigma_20
    d1 = (np.log(S / X) + (r + (σ ** 2) / 2) * T) / (σ * (T ** .5))
    d2 = d1 - σ * (T ** .5)
    C = S * norm.cdf(d1) - X * np.exp(-r * T) * norm.cdf(d2)
    return C
def black_scholes_put(row):
    S = row.closing_price
    X = row.strike_price
    T = row.date_ndiff / 365
    r = row.treasury_rate / 100
    σ = row.sigma_20
    d1 = (np.log(S / X) + (r + (σ ** 2) / 2) * T) / (σ * (T ** .5))
    d2 = d1 - σ * (T ** .5)
    P  = norm.cdf(-d2) * X * np.exp(-r * T) - S * norm.cdf(-d1)
    return P

In [36]:
def error_metrics(actual, predicted):
    diff = actual - predicted
    mse = np.mean(np.square(diff))
    rel = diff / actual
    bias = 100 * np.median(rel)
    aape = 100 * np.mean(np.abs(rel))
    mape = 100 * np.median(np.abs(rel))
    pe5 = 100 * sum(np.abs(rel) < 0.05) / rel.shape[0]
    pe10 = 100 * sum(np.abs(rel) < 0.10) / rel.shape[0]
    pe20 = 100 * sum(np.abs(rel) < 0.20) / rel.shape[0]
    return [mse, bias, aape, mape, pe5, pe10, pe20]

In [37]:
line1 = error_metrics(call_y_test, call.predict(call_X_test).reshape(call_y_test.shape[0]))
line2 = error_metrics(put_y_test, put.predict(put_X_test).reshape(put_y_test.shape[0]))
line3 = error_metrics(call_y_test, black_scholes(call_X_test))
line4 = error_metrics(put_y_test, black_scholes_put(put_X_test))

In [38]:
line3.insert(0, np.mean(np.square(call_y_train - black_scholes(call_X_train))))
line4.insert(0, np.mean(np.square(put_y_train - black_scholes_put(put_X_train))))

In [48]:
line1.insert(0, np.mean(np.square(call_y_train - call.predict(call_X_train).reshape(call_y_train.shape[0]))))

KeyboardInterrupt: 

In [51]:
line2.insert(0, np.mean(np.square(put_y_train - put.predict(put_X_train).reshape(put_y_train.shape[0]))))

In [53]:
for line in (line1, line2, line3, line4):
    print('& {:.2f} & {:.2f} & {:.2f}\% & {:.2f}\% & {:.2f}\% & {:.2f}\% & {:.2f}\% & {:.2f}\% \\\\'.format(*line))

& 23.71 & 24.00 & 0.01\% & 24.49\% & 2.12\% & 61.04\% & 68.39\% & 74.33\% \\
& 15.65 & 15.66 & 5.03\% & 43.73\% & 18.48\% & 30.46\% & 40.51\% & 51.13\% \\
& 716.30 & 725.61 & 9.48\% & 41.34\% & 9.56\% & 44.53\% & 50.24\% & 54.75\% \\
& 1326.50 & 1293.78 & 100.00\% & 80.50\% & 100.00\% & 8.21\% & 12.17\% & 15.85\% \\
