In [1]:
import sys
import os
# Append the library path to PYTHONPATH, so library can be imported.
sys.path.append(os.path.dirname(os.getcwd()))

import shutil
import datetime
import numpy as np
import pandas as pd

from library import common as cm
from library import regression_aux as raux
from library import in_sample

Using TensorFlow backend.


In [4]:
%run setup.py
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
print('Loading data for:', FREQ)
print('Permutation flag:', PERMUTE)
print('VIX flag:', VIX)

Loading data for: 1D
Permutation flag: False
VIX flag: False


In [6]:
# load data must be after setup, because some of parameters are going to be overwritten.
%run Load_Clean_aux.py normal

Loading Normal data sets!

Load and clean the training and validation data.
Original data size is 526994
We remove in-the-money samples. 264998 samples (50.28%) are removed. We have 49.72% of original data left, yielding a size of 261996.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 49.72% of original data left, yielding a size of 261996.
We remove samples when S1 is not available. 998 samples (0.38%) are removed. We have 49.53% of original data left, yielding a size of 260998.


Clean and load all Monte Carlo test data.

Load Monte Carlo set 1
We remove in-the-money samples. 72272 samples (50.83%) are removed. We have 49.17% of original data left, yielding a size of 69922.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 49.17% of original data left, yielding a size of 69922.
We remove samples when S1 is not available. 1271 samples (1.82%) are removed. We have 48.28% of original data left, yielding a size of 68651.


Load Monte Carlo set 2
We remo

We remove in-the-money samples. 60041 samples (50.27%) are removed. We have 49.73% of original data left, yielding a size of 59388.
We shrink moneyness range. 0 samples (0.00%) are removed. We have 49.73% of original data left, yielding a size of 59388.
We remove samples when S1 is not available. 815 samples (1.37%) are removed. We have 49.04% of original data left, yielding a size of 58573.




In [7]:
col=  f'FREQ={FREQ}_HALFMONEY=otm_MINM=0.8_MAXM=1.5_Permute=False_VIX=False'
# make sure col name matches the setup res_dir
assert col in res_dir     
rows = ['Regression/No_Hedge', 'Regression/BS_Benchmark', 'Regression/Fixed_Constants',
        'Regression/Delta_only', 'Regression/Vega_only', 'Regression/Gamma_only','Regression/Vanna_only',  
        'Regression/Bias',
        'Regression/Delta_Gamma', 'Regression/Delta_Vega',   'Regression/Delta_Vanna',
        'Regression/Delta_Vega_Gamma', 'Regression/Delta_Vega_Vanna', 'Regression/Delta_Gamma_Vanna',   
        'Regression/Delta_Vega_Gamma_Va', 
        'Regression/Hull_White', 'Regression/Hull_White_relaxed',
        'Network/Normal_Feature', 'Network/Delta_Vega', 'Network/Delta_Vega_Vanna']

sub_cols = ['Absolute', '%Change']
cols_indices = pd.MultiIndex.from_product([[col], sub_cols], names=['setup', 'value'])
df_call, df_put, df_both = [pd.DataFrame(index=rows, columns=cols_indices) for _ in range(3)]

In [9]:
print(f'Working on {res_dir}')

Working on C:\Users\Weiguan\Dropbox\Research\DeepHedging\Data\Heston/Result/CONFIG=4/FREQ=1D_HALFMONEY=otm_MINM=0.8_MAXM=1.5_Permute=False_VIX=False/


In [10]:
def aggregate_windows(res):
    """
    aggregate the MSE over windows by number of samples weighted average for calls and put separetely.
    Also aggregate call and puts.
    """ 
    call_mse, put_mse = [(res[(x, 'MSE')] * res[(x, 'num_samples')]).sum() / res[(x, 'num_samples')].sum() for x in ['0', '1']]
    num_c, num_p = [res[(x, 'num_samples')].sum() for x in ['0', '1']]
    both_mse = (call_mse * num_c + put_mse * num_p) / (num_c + num_p)
    return call_mse, put_mse, both_mse

In [11]:
kwargs = {'vix': VIX, 
      'features': None, 
      'max_period': 0, 
      'sub_res': None,
      'pnl_path': None,
      'df': df_train,
      'delta_coeff_1': False,
      'agg_side': False,
      'leverage': False}

##### Zero hedge

In [12]:
if not PERMUTE:
    r_short = 'Regression/No_Hedge'
   
    zero = np.array([0.]*len(df_train))
    zero = pd.Series(zero, index=df_train.index)
    
    res = in_sample.calc_in_sample_error_regression(known_delta=zero, **kwargs)
    res = aggregate_windows(res)
    df_call.loc[r_short, (col, 'Absolute')], df_put.loc[r_short, (col, 'Absolute')], df_both.loc[r_short, (col, 'Absolute')] = res

##### BS

In [13]:
if not PERMUTE:
    r_short = 'Regression/BS_Benchmark'
    
    res = in_sample.calc_in_sample_error_regression(known_delta=df_train['delta_bs'], **kwargs)
    res = aggregate_windows(res)
    df_call.loc[r_short, (col, 'Absolute')], df_put.loc[r_short, (col, 'Absolute')], df_both.loc[r_short, (col, 'Absolute')] = res

##### Fixed constants: 0.9, 1.1

In [14]:
if not PERMUTE:
    r_short = 'Regression/Fixed_Constants'
    
    bl_c = df_train['cp_int'] == 0
    delta = 0.9 * df_train['delta_bs']
    delta[~bl_c] = 1.1 * df_train.loc[~bl_c, 'delta_bs']
    
    res = in_sample.calc_in_sample_error_regression(known_delta=delta, **kwargs)
    res = aggregate_windows(res)
    df_call.loc[r_short, (col, 'Absolute')], df_put.loc[r_short, (col, 'Absolute')], df_both.loc[r_short, (col, 'Absolute')] = res

##### All other regressions

In [15]:
all_regressions = [('Regression/Delta_only', ['delta_bs'], False),
    ('Regression/Delta_Vega', ['delta_bs', 'vega_n'], False),
    ('Regression/Delta_Gamma', ['delta_bs', 'gamma_n'], False),
    ('Regression/Delta_Vanna', ['delta_bs', 'vanna_n'], False),
    ('Regression/Delta_Gamma_Vanna', ['delta_bs', 'gamma_n', 'vanna_n'], False),
    ('Regression/Delta_Vega_Gamma', ['delta_bs', 'vega_n', 'gamma_n'], False),
    ('Regression/Delta_Vega_Vanna', ['delta_bs', 'vega_n', 'vanna_n'], False),
    ('Regression/Delta_Vega_Gamma_Va', ['delta_bs', 'vega_n', 'gamma_n', 'vanna_n'], False),
    ('Regression/Vega_only', ['vega_n'], True),
 ('Regression/Gamma_only', ['gamma_n'], True),
 ('Regression/Vanna_only', ['vanna_n'], True),
 ('Regression/Bias', ['bias'], True),
 ('Regression/Hull_White', ['vega_s', 'delta_vega_s', 'delta2_vega_s'], True),
('Regression/Hull_White_relaxed', ['delta_bs', 'vega_s', 'delta_vega_s', 'delta2_vega_s'], False)
]

In [16]:
df_train['bias'] = 1.
for row, features, delta_1 in all_regressions:
    if not PERMUTE:

        kwargs = {'vix': VIX, 
              'features': features, 
              'max_period': 0, 
              'sub_res': None,
              'pnl_path': None,
              'df': df_train,
              'delta_coeff_1': delta_1,
              'agg_side': False,
              'leverage': False}

        res = in_sample.calc_in_sample_error_regression(known_delta=None, **kwargs)
        res = aggregate_windows(res)
        df_call.loc[row, (col, 'Absolute')], df_put.loc[row, (col, 'Absolute')], df_both.loc[row, (col, 'Absolute')] = res

##### ANNs

In [18]:
ori_fea = ['M0', 'tau0_implvol0']
sub_res = res_dir + 'Network/Normal_Feature/'
row = 'Network/Normal_Feature'
use_fea = [x + '_t' for x in ori_fea] + ['cp_int']
ckp_dir = sub_res + 'ckp/'

In [20]:
res = in_sample.calc_in_sample_error_ann(df_train, ckp_dir, 0, ori_fea, use_fea, sim_data=True)
res = aggregate_windows(res)
df_call.loc[row, (col, 'Absolute')], df_put.loc[row, (col, 'Absolute')], df_both.loc[row, (col, 'Absolute')] = res

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [21]:
ori_fea = ['delta_bs', '1_over_sqrt_tau', 'vega_n']
sub_res = res_dir + 'Network/Delta_Vega/'
row = 'Network/Delta_Vega'
use_fea = [x + '_t' for x in ori_fea] + ['cp_int']
ckp_dir = sub_res + 'ckp/'

In [23]:
res = in_sample.calc_in_sample_error_ann(df_train, ckp_dir, 0, ori_fea, use_fea, sim_data=True)
res = aggregate_windows(res)
df_call.loc[row, (col, 'Absolute')], df_put.loc[row, (col, 'Absolute')], df_both.loc[row, (col, 'Absolute')] = res

In [17]:
ori_fea = ['delta_bs', '1_over_sqrt_tau', 'vega_n', 'vanna_n']
sub_res = res_dir + 'Network/Delta_Vega_Vanna/'
row = 'Network/Delta_Vega_Vanna'
use_fea = [x + '_t' for x in ori_fea] + ['cp_int']
ckp_dir = sub_res + 'ckp/'

In [18]:
res = in_sample.calc_in_sample_error_ann(df_train, ckp_dir, 0, ori_fea, use_fea,sim_data=True)
res = aggregate_windows(res)
df_call.loc[row, (col, 'Absolute')], df_put.loc[row, (col, 'Absolute')], df_both.loc[row, (col, 'Absolute')] = res

In [19]:
for x, name in zip([df_call, df_put, df_both], ['call', 'put', 'both']):
    x[(col, '%Change')] = \
        ((x[(col, 'Absolute')] - x.loc['Regression/BS_Benchmark', (col, 'Absolute')]) \
        / x.loc['Regression/BS_Benchmark', (col, 'Absolute')]).astype(np.float).round(4) * 100
    x[(col, 'Absolute')] = (x[(col, 'Absolute')] * 100.).astype(np.float).round(4)
    
    x.to_csv(f'{DATA_DIR}Result/in_sample_{name}_mse_{FREQ}.csv')