In [None]:
import sys
import os
# Append the library path to PYTHONPATH, so library can be imported.
sys.path.append(os.path.dirname(os.getcwd()))
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import norm

from library import common as cm
from library import regression_aux as raux

In [None]:
%run setup.py
%matplotlib inline
%load_ext autoreload
%autoreload 2

sns.set(style='darkgrid')

In [None]:
print(f'Result PNL are loaded from: {res_dir}')

In [None]:
file_path = DATA_DIR + 'CleanData/processed.csv'

df = pd.read_csv(
    file_path, 
    parse_dates=['date', 'exdate', 'last_date'],
    dtype={'cp_flag': 'category'},
    index_col=0
)

df_spx = pd.read_csv(DATA_DIR + 'RawData/spx500.csv', index_col=0, usecols=['date', 'close'])
df = df.reset_index(drop=True)
df = df.loc[df['cp_flag'] == 'OP']

In [None]:
df_real_1 = df.loc[df['date'] < pd.Timestamp(2010, 4, 1)]
df_real_2 = df.loc[(df['date'] > pd.Timestamp(2015, 1, 1)) & (df['date'] < pd.Timestamp(2015,4,1))]

df_real_1 = df_real_1.loc[df_real_1['volume'] > 1000]
df_real_2 = df_real_2.loc[df_real_2['volume'] > 1000]
ops_1 = df_real_1['optionid'].unique()
ops_2 = df_real_2['optionid'].unique()

bl = [(i in ops_1) or (i in ops_2) for i in df['optionid']]
df_real = df.loc[bl]

In [None]:
groups = df_real.groupby('optionid')

fig_, ax_ = plt.subplots(figsize=(12, 6))
datelist = [datetime.datetime.strptime(_, '%Y/%m/%d') for _ in df_spx.index]
for key, g in groups:
    if key in ops_1:
        color='r'   
    else:
        color='black'
    ax_.hlines(y=g['K'].min() + 100*np.random.standard_normal(),
              xmin=g['date'].min(),
              xmax=g['exdate'].max(),
              alpha=1., lw=0.5, color=color)
ax_.plot(datelist, df_spx['close'].values)
fig_.autofmt_xdate()
#fig_.savefig('realoptions.jpg', dpi=200)

Load the data that is used for the linear regression and ANNs.

In [None]:
"""  Remember to run with 1D!!! setup , everything default."""
%run Load_Clean_aux.py

In [None]:
df.loc[df['cp_int'] == 0, 'delta_bs'].hist(bins=50)

In [None]:
plt.hist(norm.ppf(df.loc[df['cp_int'] == 0, 'delta_bs']), bins=50);

In [None]:
df.loc[df['cp_int'] == 1, 'delta_bs'].hist(bins=50)

In [None]:
plt.hist(norm.ppf(df.loc[df['cp_int'] == 1, 'delta_bs'] + 1), bins=50);

In [None]:
plt.hist(df['M0'], bins=50);
plt.xlabel('Moneyness')
plt.yscale('log')
plt.title('S&P 500')
plt.ylim((1e3, 3e5))
plt.yticks((1e3, 1e4, 1e5), (1000, 10000, 100000))

In [None]:
plt.hist(df['tau0'], bins=50);
plt.xlabel('Time-to-maturity')
plt.yscale('log')
plt.title('S&P 500')
plt.ylim((1e2, 1e6))
plt.yticks((1e2, 1e3, 1e4, 1e5,  1e6), (100, 1000, 10000, 100000, 1000000))
plt.xlim((0., 3.2))

In [None]:
max_period = max([int(s[6:]) for s in df.columns if 'period' in s])

In [None]:
nums_call_train, nums_put_train = [], []
nums_call_test, nums_put_test = [], []

for i in range(max_period + 1):
    nums_call_train.append((((df[f'period{i}'] == 0) | (df[f'period{i}'] == 1)) & (df['cp_int'] == 0)).sum())
    nums_put_train.append((((df[f'period{i}'] == 0) | (df[f'period{i}'] == 1)) & (df['cp_int'] == 1)).sum())
    nums_call_test.append(((df[f'period{i}'] == 2) & (df['cp_int'] == 0)).sum())
    nums_put_test.append(((df[f'period{i}'] == 2) & (df['cp_int'] == 1)).sum())

In [None]:
ind = np.arange(1, max_period+2)
width = 0.3
p1 = plt.bar(ind, nums_call_train, width=width)
p2 = plt.bar(ind, nums_put_train, bottom=nums_call_train, width=width)
p3 = plt.bar(ind+width, nums_call_test, width=width)
p4 = plt.bar(ind+width, nums_put_test, bottom=nums_call_test, width=width)
plt.legend((p1[0], p2[0], p3[0], p4[0]), 
           ('In-sample call', 'In-sample put', 'Out-of-sample call', 'Out-of-sample put'), 
           frameon=False)
plt.ylabel('# Samples')
plt.xlabel('Time window')
plt.title('S&P 500')

In [None]:
num_train = pd.Series([x + y for x, y in zip(nums_call_train, nums_put_train)])
num_test = pd.Series([x + y for x, y in zip(nums_call_test, nums_put_test)])

In [None]:
max_period=13
if FREQ == '1D':
    num_d = 1
    annual_cof = 252 / num_d
if FREQ == '2D':
    num_d = 2
    annual_cof = 252 / num_d

In [None]:
[df_vol_train, df_vol_test, df_return_train, df_return_test] = [pd.Series(index=range(max_period + 1)) for _ in range(4)]
for i in range(max_period + 1):
    df_train = df.loc[(df[f'period{i}'] == 0) | (df[f'period{i}'] == 1)]
    df_test = df.loc[df[f'period{i}'] == 2]
    df_train = df_train[['S0', 'S1', 'date']].set_index('date')
    df_test = df_test[['S0', 'S1', 'date']].set_index('date')
    
    for df_tmp, (df_vol_tmp, df_return_tmp) in zip([df_train, df_test], 
                                                   [(df_vol_train, df_return_train), (df_vol_test, df_return_test)]):
        window_return = np.log(df_tmp['S1'] / df_tmp['S0']).resample(FREQ).mean()
        window_return.dropna(inplace=True)
        df_vol_tmp.loc[i] = window_return.std() * np.sqrt(annual_cof)
        df_return_tmp.loc[i] = (window_return.mean()  * annual_cof)

plt.errorbar(df_vol_train.index + 1 - 0.1, df_return_train, yerr=df_vol_train, fmt='o', label='In-sample')
plt.errorbar(df_vol_test.index + 1 + 0.1, df_return_test, yerr=df_vol_test, fmt='o', label='Out-of-sample')
plt.legend(frameon=False)
plt.ylabel('Annualized return of S&P 500')
plt.xlabel('Time window')
plt.title('1 day')
plt.ylim((-0.3, 0.5))
plt.xticks(df_vol_train.index+1)

In [None]:
max_period = 13
mat = [
    (0., 1/12.,  'Small'), (1/12., 1/2., 'Middle'), (1/2., 2.,  'Large'),
     ]
vega_mean = pd.DataFrame(index=range(max_period))

for i in range(max_period + 1):
    df_train = df.loc[(df[f'period{i}'] == 0) | (df[f'period{i}'] == 1) | (df[f'period{i}'] == 2)]
    for mat_low, mat_high, n1 in mat:
        bl2 = df_train['tau0'] >= mat_low
        bl3 = df_train['tau0'] < mat_high
        bl =  bl2 & bl3
        
        vega_mean.loc[i, f'{n1}'] = df_train.loc[bl, 'vega_n'].mean()

In [None]:
for x in vega_mean:
    plt.plot(vega_mean.index+1, vega_mean[x], label=x, marker='o', linewidth=0)
plt.legend()
plt.xlabel('Time window')
plt.ylabel('Average Vega')

In [None]:
max_period = 13
mat = [(0, 0., 1/12., 'Call', 'short'), (0, 1/12., 1/2., 'Call', 'middle'), (0, 1/2., 2., 'Call', 'long'),
      (1, 0., 1/12., 'Put', 'short'), (1, 1/12., 1/2., 'Put', 'middle'), (1, 1/2., 2., 'Put', 'long')]

leve = pd.DataFrame(index=range(max_period))

for i in range(max_period + 1):
    df_train = df.loc[(df[f'period{i}'] == 0) | (df[f'period{i}'] == 1)]
    for c, mat_low, mat_high, n1, n2 in mat:
        bl1 = df_train['cp_int'] == c
        bl2 = df_train['tau0'] >= mat_low
        bl3 = df_train['tau0'] < mat_high
        bl = bl1 & bl2 & bl3
        
        leve.loc[i, f'{n1} ({n2})'] = raux.fit_leverage(df_train.loc[bl])

In [None]:
cat = ['Put (long)', 'Call (long)', 'Put (middle)','Call (middle)', 'Put (short)' , 'Call (short)']

In [None]:
fig= plt.figure()
for x in cat:
    plt.plot(leve.index+1, leve[x], marker='o', label=x, linewidth=0)
plt.legend()
plt.ylim((-0.5, 0.5))
plt.legend(bbox_to_anchor=(0,-.35,1,0.2), loc="lower left",
                mode="expand", borderaxespad=0, ncol=3)
plt.xlabel('Time window')
plt.ylabel('Leverage coefficient')
plt.title('S&P 500')

In [None]:
max_period = 13
mat = [(0, 0., 2., 'Call', 'short'), 
      (1, 0., 2., 'Put', 'short')]

leve = pd.DataFrame(index=range(max_period))

for i in range(max_period + 1):
    df_train = df.loc[(df[f'period{i}'] == 0) | (df[f'period{i}'] == 1)]
    for c, mat_low, mat_high, n1, n2 in mat:
        bl1 = df_train['cp_int'] == c
        bl2 = df_train['tau0'] >= mat_low
        bl3 = df_train['tau0'] < mat_high
        bl = bl1 & bl2 & bl3
        
        leve.loc[i, f'{n1} ({n2})'] = raux.fit_leverage(df_train.loc[bl])

cat = ['Call (short)',  'Put (short)']

fig= plt.figure()
for x in cat:
    plt.plot(leve.index+1, leve[x], marker='o', label=x, linewidth=0)
plt.legend()
plt.ylim((-0.5, 0.5))
plt.legend(bbox_to_anchor=(0,-.35,1,0.2), loc="lower left",
                mode="expand", borderaxespad=0, ncol=3)
plt.xlabel('Time window')
plt.ylabel('Leverage coefficient')
plt.title('S&P 500')