In [1]:
import sys
import os
# Append the library path to PYTHONPATH, so library can be imported.
sys.path.append(os.path.dirname(os.getcwd()))

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from library import bs
from library import plot
from library import bs
from library import common as cm

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
%run setup.py
%load_ext autoreload
%autoreload 2
# load data must be after setup, because some of parameters are going to be overrided.
%run Load_Clean_aux.ipynb

Load and clean the training and validation data.
Original data size is 28846
We remove in-the-money samples. 15540 samples are removed. We have  46.13% of original data left.
We shrink moneyness range. 49 samples are removed. We have  45.96% of original data left.


Clean and load all Monte Carlo test data.

We remove in-the-money samples. 3329 samples are removed. We have  47.78% of original data left.
We shrink moneyness range. 0 samples are removed. We have  47.78% of original data left.


We remove in-the-money samples. 3569 samples are removed. We have  47.39% of original data left.
We shrink moneyness range. 22 samples are removed. We have  47.07% of original data left.


We remove in-the-money samples. 3411 samples are removed. We have  48.22% of original data left.
We shrink moneyness range. 0 samples are removed. We have  48.22% of original data left.


We remove in-the-money samples. 3173 samples are removed. We have  48.52% of original data left.
We shrink moneyness range. 9

We remove in-the-money samples. 3643 samples are removed. We have  47.60% of original data left.
We shrink moneyness range. 59 samples are removed. We have  46.75% of original data left.


We remove in-the-money samples. 3104 samples are removed. We have  48.89% of original data left.
We shrink moneyness range. 0 samples are removed. We have  48.89% of original data left.


We remove in-the-money samples. 3124 samples are removed. We have  48.56% of original data left.
We shrink moneyness range. 0 samples are removed. We have  48.56% of original data left.


We remove in-the-money samples. 3247 samples are removed. We have  48.61% of original data left.
We shrink moneyness range. 0 samples are removed. We have  48.61% of original data left.


We remove in-the-money samples. 3491 samples are removed. We have  48.27% of original data left.
We shrink moneyness range. 0 samples are removed. We have  48.27% of original data left.


We remove in-the-money samples. 3467 samples are removed. W

In [4]:
def fit_reg(df_train, features, hull_white=False):
    
    df_call_train = df_train.loc[df_train['cp_int'] == 0]
    df_put_train = df_train.loc[df_train['cp_int'] == 1]

    lin_c = cm.fit_reg(df_call_train, features, intercept_bl=False, hull_white=hull_white)
    print('Coefficient of regression for calls :', [f'{co:.4f}' for co in lin_c.coef_])

    lin_p = cm.fit_reg(df_put_train, features, intercept_bl=False, hull_white=hull_white)
    print('Coefficient of regression for puts :', [f'{co:.4f}' for co in lin_p.coef_])
    
    return {'call': lin_c, 'put': lin_p}

### Normal: no permute, no VIX

In [5]:
res_dir = DATA_DIR + 'Result_Normal/Regression/'

#### BS

In [6]:
os.makedirs(f'{res_dir}BS_Benchmark/pnl/', exist_ok=True)
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    bs.store_pnl(
        df_test, df_test['delta_bs'],
        pnl_path=f'{res_dir}BS_Benchmark/pnl/pnl{i}.csv',
        dt=DT)

#### Delta regression

To fit regression on calls and puts repectively. Then, we test regression on all MC sets, separtely for calls and puts.

In [8]:
features = ['delta_bs']
lins = fit_reg(df_train, features)

sub_res = f'{res_dir}Delta_Regression/pnl/'
os.makedirs(sub_res, exist_ok=True)

Coefficient of regression for calls : ['1.0017']
Coefficient of regression for puts : ['1.0028']


In [9]:
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    delta = cm.test_mc_core(lins, features, df_test, DT)
    bs.store_pnl(
        df_test, delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT)

#### Delta vega regression

In [10]:
features = ['delta_bs', 'vega']
lins = fit_reg(df_train, features)

sub_res = f'{res_dir}Delta_Vega/pnl/'
os.makedirs(sub_res, exist_ok=True)

Coefficient of regression for calls : ['1.0036', '-0.0001']
Coefficient of regression for puts : ['1.0134', '0.0003']


In [11]:
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    delta = cm.test_mc_core(lins, features, df_test, DT)
    bs.store_pnl(
        df_test, delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

#### Hull White model

In [12]:
features = ['vega_s', 'delta_vega_s', 'delta2_vega_s']

lins = fit_reg(df_train, features, hull_white=True)

Coefficient of regression for calls : ['0.0252', '-0.0417', '-0.0611']
Coefficient of regression for puts : ['-0.0307', '-0.2230', '-0.4063']


In [13]:
sub_res = f'{res_dir}Hull_White/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [14]:
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    delta = cm.test_mc_core(lins, features, df_test, DT)
    bs.store_pnl(
        df_test, delta + df_test['delta_bs'],
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

### VIX: with VIX, but no permute

In [15]:
res_dir = DATA_DIR + 'Result_VIX/Regression/'

BS benchmark is the same. But to put the file in the same folder with others for the convinience of later diagnostics, we run it again.

In [16]:
os.makedirs(f'{res_dir}BS_Benchmark/pnl/', exist_ok=True)
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    bs.store_pnl(
        df_test, df_test['delta_bs'],
        pnl_path=f'{res_dir}BS_Benchmark/pnl/pnl{i}.csv',
        dt=DT)

#### Delta, vix

In [17]:
features = ['delta_bs', 'fake_vix']
lins = fit_reg(df_train, features)

Coefficient of regression for calls : ['1.0042', '-0.0001']
Coefficient of regression for puts : ['1.0014', '-0.0001']


In [18]:
sub_res = f'{res_dir}Delta_Regression/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [19]:
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    delta = cm.test_mc_core(lins, features, df_test, DT)
    bs.store_pnl(
        df_test, delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT)

#### Delta, Vega, VIX

In [20]:
features = ['delta_bs', 'vega', 'fake_vix']
lins = fit_reg(df_train, features)

sub_res = f'{res_dir}Delta_Vega/pnl/'
os.makedirs(sub_res, exist_ok=True)

Coefficient of regression for calls : ['1.0057', '-0.0001', '-0.0001']
Coefficient of regression for puts : ['1.0123', '0.0003', '-0.0001']


In [21]:
for i in range(NUM_TEST):
    df_test = mc_sets[i]
    delta = cm.test_mc_core(lins, features, df_test, DT)
    bs.store_pnl(
        df_test, delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

Hull-White is not going to be used with VIX.

### Permute: but no VIX

#### Prepare all permuated data sets

In [22]:
train_permutes, test_permutes = [], []
for i in range(NUM_TEST):
    # the union of train and test
    df_permute = df_train.append(mc_sets[i], ignore_index=True, sort=False)
    df_permute = cm.permute_core(df_permute, 0, random_seed=i)
    
    df_train_permuted = df_permute.loc[((df_permute['period0'] == 0) | (df_permute['period0'] == 1))]
    df_test_permuted = df_permute.loc[df_permute['period0'] == 2]
    
    train_permutes.append(df_train_permuted)
    test_permutes.append(df_test_permuted)

In [23]:
res_dir = DATA_DIR + 'Result_Permute/Regression/'

#### BS
Permuting affects the Black-Scholes, because the test set is different.

In [24]:
os.makedirs(f'{res_dir}BS_Benchmark/pnl/', exist_ok=True)
for i in range(NUM_TEST):
    bs.store_pnl(
        test_permutes[i], test_permutes[i]['delta_bs'],
        pnl_path=f'{res_dir}BS_Benchmark/pnl/pnl{i}.csv',
        dt=DT)

#### Delta regression

In [25]:
features = ['delta_bs']
sub_res = f'{res_dir}Delta_Regression/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [26]:
for i in range(NUM_TEST):    
    
    lins = fit_reg(train_permutes[i], features)
    delta = cm.test_mc_core(lins, features, test_permutes[i], DT)
    bs.store_pnl(
        test_permutes[i], delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

Coefficient of regression for calls : ['0.9983']
Coefficient of regression for puts : ['1.0004']
Coefficient of regression for calls : ['1.0016']
Coefficient of regression for puts : ['0.9984']
Coefficient of regression for calls : ['1.0040']
Coefficient of regression for puts : ['1.0016']
Coefficient of regression for calls : ['0.9993']
Coefficient of regression for puts : ['1.0023']
Coefficient of regression for calls : ['1.0073']
Coefficient of regression for puts : ['0.9956']
Coefficient of regression for calls : ['0.9991']
Coefficient of regression for puts : ['1.0035']
Coefficient of regression for calls : ['1.0085']
Coefficient of regression for puts : ['0.9958']
Coefficient of regression for calls : ['1.0031']
Coefficient of regression for puts : ['1.0033']
Coefficient of regression for calls : ['1.0014']
Coefficient of regression for puts : ['1.0056']
Coefficient of regression for calls : ['0.9975']
Coefficient of regression for puts : ['1.0047']
Coefficient of regression for 

#### Delta, Vega

In [27]:
features = ['delta_bs', 'vega']
sub_res = f'{res_dir}Delta_Vega/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [28]:
for i in range(NUM_TEST):
    
    lins = fit_reg(train_permutes[i], features)
    delta = cm.test_mc_core(lins, features, test_permutes[i], DT)
    bs.store_pnl(
        test_permutes[i], delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

Coefficient of regression for calls : ['0.9934', '0.0001']
Coefficient of regression for puts : ['1.0010', '0.0000']
Coefficient of regression for calls : ['0.9938', '0.0002']
Coefficient of regression for puts : ['1.0082', '0.0003']
Coefficient of regression for calls : ['1.0073', '-0.0001']
Coefficient of regression for puts : ['1.0114', '0.0003']
Coefficient of regression for calls : ['1.0021', '-0.0001']
Coefficient of regression for puts : ['1.0071', '0.0001']
Coefficient of regression for calls : ['0.9977', '0.0003']
Coefficient of regression for puts : ['0.9995', '0.0001']
Coefficient of regression for calls : ['1.0122', '-0.0004']
Coefficient of regression for puts : ['1.0037', '0.0000']
Coefficient of regression for calls : ['1.0021', '0.0002']
Coefficient of regression for puts : ['1.0134', '0.0005']
Coefficient of regression for calls : ['1.0020', '0.0000']
Coefficient of regression for puts : ['1.0091', '0.0002']
Coefficient of regression for calls : ['1.0081', '-0.0002']
C

Because there is no way to use VIX with Hull-White model, we don't permute the Hull-White either.

### Permute and VIX

In [29]:
res_dir = DATA_DIR + 'Result_Permute_VIX/Regression/'

#### BS
For completeness


In [30]:
features = ['delta_bs']
sub_res = f'{res_dir}BS_Benchmark/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [31]:
for i in range(NUM_TEST):  
    bs.store_pnl(
        test_permutes[i], test_permutes[i]['delta_bs'],
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

#### Delta, VIX

In [32]:
features = ['delta_bs', 'fake_vix']
sub_res = f'{res_dir}Delta_Regression/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [33]:
for i in range(NUM_TEST):  
    
    lins = fit_reg(train_permutes[i], features)
    delta = cm.test_mc_core(lins, features, test_permutes[i], DT)
    bs.store_pnl(
        test_permutes[i], delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

Coefficient of regression for calls : ['0.9983', '-0.0000']
Coefficient of regression for puts : ['1.0004', '0.0000']
Coefficient of regression for calls : ['1.0035', '-0.0001']
Coefficient of regression for puts : ['0.9972', '-0.0001']
Coefficient of regression for calls : ['1.0045', '-0.0001']
Coefficient of regression for puts : ['1.0013', '-0.0001']
Coefficient of regression for calls : ['0.9997', '-0.0000']
Coefficient of regression for puts : ['1.0025', '0.0000']
Coefficient of regression for calls : ['1.0097', '-0.0003']
Coefficient of regression for puts : ['0.9943', '-0.0002']
Coefficient of regression for calls : ['0.9988', '0.0000']
Coefficient of regression for puts : ['1.0035', '0.0000']
Coefficient of regression for calls : ['1.0087', '-0.0002']
Coefficient of regression for puts : ['0.9964', '-0.0002']
Coefficient of regression for calls : ['1.0041', '-0.0001']
Coefficient of regression for puts : ['1.0028', '-0.0001']
Coefficient of regression for calls : ['1.0021', '-0

#### Delta, Vega, VIX

In [34]:
features = ['delta_bs', 'vega', 'fake_vix']
sub_res = f'{res_dir}Delta_Vega/pnl/'
os.makedirs(sub_res, exist_ok=True)

In [35]:
for i in range(NUM_TEST):
    lins = fit_reg(train_permutes[i], features)
    delta = cm.test_mc_core(lins, features, test_permutes[i], DT)
    bs.store_pnl(
        test_permutes[i], delta,
        pnl_path=f'{sub_res}pnl{i}.csv',
        dt=DT
    )

Coefficient of regression for calls : ['0.9932', '0.0001', '0.0000']
Coefficient of regression for puts : ['1.0015', '0.0000', '0.0000']
Coefficient of regression for calls : ['0.9975', '0.0002', '-0.0001']
Coefficient of regression for puts : ['1.0061', '0.0002', '-0.0001']
Coefficient of regression for calls : ['1.0112', '-0.0002', '-0.0001']
Coefficient of regression for puts : ['1.0088', '0.0002', '-0.0001']
Coefficient of regression for calls : ['1.0037', '-0.0001', '-0.0000']
Coefficient of regression for puts : ['1.0079', '0.0002', '0.0000']
Coefficient of regression for calls : ['1.0071', '0.0001', '-0.0003']
Coefficient of regression for puts : ['0.9941', '-0.0000', '-0.0002']
Coefficient of regression for calls : ['1.0116', '-0.0004', '0.0000']
Coefficient of regression for puts : ['1.0042', '0.0000', '0.0000']
Coefficient of regression for calls : ['1.0113', '-0.0001', '-0.0002']
Coefficient of regression for puts : ['1.0059', '0.0003', '-0.0002']
Coefficient of regression f