In [1]:
import pandas as pd
import numpy as np
import sys
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pandas.tseries.offsets import BDay
from sklearn.linear_model import LinearRegression
import os
import getpass

In [2]:
if getpass.getuser() in ['ygnmax']:
    if sys.platform == 'linux':
        workdir = '/home/ygnmax/Dropbox/research_nyu/hedge_vol/'
    if sys.platform == 'win32':
        workdir = 'C:/Users/ygnmax/Dropbox/research_nyu/hedge_vol/'
os.chdir(workdir)

# different hedge

In [3]:
def zero_hedge(df_input):
    df = df_input.copy()
    #----------------------------
    # Calculate Hedge PNL/Error
    #----------------------------
    df['Zero_Hedge_PL'] = df['V0_n']*df['on_ret'] - df['V1_n']
    df_C = df[(df['CallPut'] == 'C')]
    df_P = df[(df['CallPut'] == 'P')] 
    NoHedge_C = np.mean(df_C['Zero_Hedge_PL'] ** 2)
    NoHedge_P = np.mean(df_P['Zero_Hedge_PL'] ** 2)
    return NoHedge_C, NoHedge_P

def BS_hedge(df_input, delta_var):
    df = df_input.copy()
    df['BS_PL'] = df[delta_var] * df['S1_n'] + df['on_ret'] * (df['V0_n'] - df[delta_var] * df['S0_n']) - df['V1_n']
    df_C = df[df['CallPut'] == 'C']
    df_P = df[df['CallPut'] == 'P']      
    BSHedge_C = np.mean(df_C.loc[:,'BS_PL'] ** 2)
    BSHedge_P = np.mean(df_P.loc[:,'BS_PL'] ** 2)
    return BSHedge_C, BSHedge_P

def Fixed_BS_Hedge(df_input, delta_var, call_coef = 0.9, put_coef = 1.1, whole_period = False):
    df = df_input.copy()    
    df.loc[df['CallPut'] == 'C', 'DeltaFixed'] = call_coef * df[delta_var]
    df.loc[df['CallPut'] == 'P', 'DeltaFixed'] = put_coef * df[delta_var]
    df['Fixed_PL'] = df['DeltaFixed'] * df['S1_n'] + df['on_ret'] * (df['V0_n'] - df['DeltaFixed'] * df['S0_n']) - df['V1_n']
    df_C = df[(df['CallPut'] == 'C')]
    df_P = df[(df['CallPut'] == 'P')]
    
    FixedHedge_C = np.mean(df_C['Fixed_PL']** 2)
    FixedHedge_P = np.mean(df_P['Fixed_PL'] ** 2)

    return FixedHedge_C, FixedHedge_P


In [4]:
def calc_pnl(train, test, weight, delta_var = 'Delta'): 
    df_train = train.copy()
    df_test = test.copy()
    df_weight = weight.copy()
    
    delta_hat = pd.Series(index=df_test.index, dtype = 'float64')
    dict_output = {}
    dict_plot = {}
    
    if len(np.unique(df_train['CallPut'])) == 1:
        if np.unique(df_train['CallPut'])[0] == 'C':
            df_train.loc[-1, 'CallPut'] = 'P'
        else:
            df_train.loc[-1, 'CallPut'] = 'C'
    
    for optype, group in df_train.groupby(by=['CallPut']):
        test_index = df_test.loc[df_test['CallPut'] == optype].index
        dict_coef = {}        
        if len(group) < 2:
            print('training set of ' + optype + ' has insufficient observations')
            coef = [np.nan]
            std = np.nan
            pass
        else:
            ## add weights
            group = pd.merge(group, df_weight, how = 'left', on = 'Date')
            earliest = group.loc[group['Date'] == np.min(group['Date'].values), :].copy()
             
            ## y_train is the change of option price in the training set
            y_train = group['V1_n'] - group['V0_n'] * group['on_ret']
            ## x_train is Delta times the change of stock price in the training set
            w_train = group.loc[:, 'weight'].copy()
            x_train = group.loc[:, delta_var].copy()
            x_train = x_train.multiply(group['S1_n'] - group['S0_n'] * group['on_ret'], axis=0).values.reshape(-1,1)
            lin = LinearRegression(fit_intercept=False).fit(x_train, y_train, sample_weight = group['weight'])
            coef = lin.coef_
            
            y_earliest = earliest['V1_n'] - earliest['V0_n'] * earliest['on_ret']
            x_earliest = earliest.loc[:, delta_var].copy()
            x_earliest = x_earliest.multiply(earliest['S1_n'] - earliest['S0_n'] * earliest['on_ret'], axis=0).values.reshape(-1,1)
            w_earliest = earliest.loc[:, 'weight'].copy()
            
            dict_plot[optype + '_x_train'] = x_train
            dict_plot[optype + '_y_train'] = y_train
            dict_plot[optype + '_w_train'] = w_train
            dict_plot[optype + '_coef'] = coef
            dict_plot[optype + '_x_earliest'] = x_earliest
            dict_plot[optype + '_y_earliest'] = y_earliest
            dict_plot[optype + '_w_earliest'] = w_earliest
            
            ## calculate the standard error of coefficient
            y_hat_train = lin.predict(x_train)
            residual_sum_of_square = ((y_train - y_hat_train) ** 2).sum()
            sigma_square_hat = residual_sum_of_square / (x_train.shape[0] - x_train.shape[1])
            var_beta = (np.linalg.inv(x_train.T @ x_train) * sigma_square_hat)
            std = [np.sqrt(var_beta[i, i]) for i in range(len(var_beta))]
            
            
            ## y_hat_test is predicted delta in the test set 
            if len(df_test[(df_test['CallPut'] == optype)]) < 1:
                print('test set of ' + optype + ' has insufficient observations')
                pass
            else:
                delta_hat.loc[test_index] = lin.predict(df_test.loc[test_index, delta_var].values.reshape(-1,1))
                
                y_test = df_test.loc[test_index, 'V1_n'] - df_test.loc[test_index, 'V0_n'] * df_test.loc[test_index, 'on_ret']
                x_test = df_test.loc[test_index, delta_var].copy()
                x_test = x_test.multiply(df_test.loc[test_index, 'S1_n'] - df_test.loc[test_index, 'S0_n'] * df_test.loc[test_index, 'on_ret'], axis=0).values.reshape(-1,1)  

                dict_plot[optype + '_x_test'] = x_test
                dict_plot[optype + '_y_test'] = y_test
                dict_plot[optype + '_predict'] = lin.predict(x_test)   
            
        dict_coef['type'] = optype
        dict_coef['coef'] = coef
        dict_coef['std'] = std
        dict_coef['N_train'] = len(group)
        dict_coef['days_train'] = len(np.unique(group.Date))
        dict_coef['N_test'] = len(df_test.loc[test_index])
        dict_coef['days_test'] = len(np.unique(df_test.loc[test_index].Date))
                
        df_test_atm = df_test.loc[(df_test['CallPut'] == optype), :]
        if len(df_test_atm) == 0:
            atm30_vol = np.NaN
            atm30_v = np.NaN
            atm60_vol = np.NaN
            atm60_v = np.NaN
            atm90_vol = np.NaN
            atm90_v = np.NaN
            pass
        else:
            atm30_vol = np.unique(df_test_atm.loc[:, 'IV_ATM30'])[0]
            atm30_v = np.unique(df_test_atm.loc[:, 'V_ATM30'])[0]
            if (atm30_vol < 0.01) | (atm30_vol > 5):
                atm30_vol = np.NaN
            atm60_vol = np.unique(df_test_atm.loc[:, 'IV_ATM60'])[0]
            atm60_v = np.unique(df_test_atm.loc[:, 'V_ATM60'])[0]
            if (atm60_vol < 0.01) | (atm60_vol > 5):
                atm60_vol = np.NaN
            atm90_vol = np.unique(df_test_atm.loc[:, 'IV_ATM90'])[0]
            atm90_v = np.unique(df_test_atm.loc[:, 'V_ATM90'])[0]
            if (atm60_vol < 0.01) | (atm60_vol > 5):
                atm90_vol = np.NaN
       
        dict_coef['atm30_vol_test'] = atm30_vol
        dict_coef['atm30_oprice_test'] = atm30_v
        dict_coef['atm60_vol_test'] = atm60_vol
        dict_coef['atm60_oprice_test'] = atm60_v        
        dict_coef['atm90_vol_test'] = atm90_vol
        dict_coef['atm90_oprice_test'] = atm90_v 
               
        dict_coef['list_maturity_train'] = np.unique(group.Maturity)
        dict_output[optype] = dict_coef

    #------------------------------------------
    ## calculate PNL using the estimated delta
    #------------------------------------------
    pnl = delta_hat * df_test['S1_n'] + (df_test['V0_n'] - delta_hat * df_test['S0_n']) * df_test['on_ret']  - df_test['V1_n']  

    df_PNL = df_test[['Date', 'CallPut']].copy()
    df_PNL['delta'] = delta_hat
    df_PNL['PNL'] = pnl
    df_PNL['M0'] = df_test['M0'].copy()
    df_PNL['tau0'] = df_test['tau0'].copy()
    df_PNL['OptionID'] = df_test['OptionID'].copy()
    return df_PNL, dict_output, dict_plot, group

In [5]:
def merge_syn(df_input, stock_id):
    df = df_input.copy()
    df_syn = pd.read_csv(workdir + 'data/processed/synthetic/syn_WRDS_2021/df_' + str(stock_id) + '.csv', parse_dates = ['Date', 'Expiration'])
    df_syn_30 = df_syn.loc[df_syn['Maturity'] == 30, ['Date', 'CallPut', 'StockPrice', 'OptionPrice', 'IV']].rename(columns = {'IV': 'IV_ATM30', 'OptionPrice': 'V_ATM30'})
    df_syn_60 = df_syn.loc[df_syn['Maturity'] == 60, ['Date', 'CallPut', 'OptionPrice', 'IV']].rename(columns = {'IV': 'IV_ATM60', 'OptionPrice': 'V_ATM60'})
    df_syn_90 = df_syn.loc[df_syn['Maturity'] == 90, ['Date', 'CallPut', 'OptionPrice', 'IV']].rename(columns = {'IV': 'IV_ATM90', 'OptionPrice': 'V_ATM90'})

    df_syn = pd.merge(df_syn_30, df_syn_60, on = ['Date', 'CallPut'], how = 'left')
    df_syn = df_syn.merge(df_syn_90, on = ['Date', 'CallPut'], how = 'left')

    df = df.merge(df_syn, how = 'left', on = ['Date', 'CallPut'])
    return df

# Main

In [6]:
##############################
# Get stock list / dictionary
##############################
df_bubble = pd.read_excel(workdir + "data/dot_com_firms.xlsx", engine = 'openpyxl').dropna()
df_bubble["SecurityID"] = df_bubble["SecurityID"].astype(int)
df_big = pd.read_excel(workdir + "data/big_firms.xlsx", engine = 'openpyxl').dropna()
df_big["SecurityID"] = df_big["SecurityID"].astype(int)
# read other companies
df_other = pd.read_excel(workdir + "data/other_firms.xlsx").dropna()
df_other["SecurityID"] = df_other["SecurityID"].astype(int)

df_stock_list = pd.concat([df_bubble[['Name','Ticker','SecurityID', 'Internet']], 
                           df_big[['Name','Ticker','SecurityID', 'Internet']], 
                          df_other[['Name','Ticker','SecurityID', 'Internet']]])
df_stock_list["Internet"] = df_stock_list["Internet"].astype(int)
df_stock_list = df_stock_list.dropna()
df_stock_list = df_stock_list.reset_index(drop = True)

ids = {}
dict_tech_label = {}
for i in list(df_stock_list.index):
    ids[df_stock_list.loc[i, 'SecurityID']] = df_stock_list.loc[i, 'Name']
    dict_tech_label[df_stock_list.loc[i, 'SecurityID']] = df_stock_list.loc[i, 'Internet']

## Calulate the PNL

In [15]:
step = 1
step_path = '/step_' + str(step) + 'd_'

adjusted_delta = 1
if adjusted_delta == 1:
    adjusted_delta_path = 'adjusted_delta_'
    input_path = workdir + 'data/processed/intermediate/WRDS_2021/' + 'delta_'
    delta_var = 'Delta_c'
elif adjusted_delta == 0:
    adjusted_delta_path = 'raw_delta_'
    input_path = workdir + 'data/processed/intermediate/WRDS_2021/' + 'df_'
    delta_var = 'Delta'
else:
    print('Set the correct delta')

    
rolling_weights = True

M_min = 0
M_max = 100
train_length = 20 # ([train_length, parameter]: [240, 0.99], [360, 0.995])

if rolling_weights:
    train_length_path = 'train_length_' + str(train_length) + 'd_wt/'
    wt_exp = [0.99**i for i in range(train_length-1, -1, -1)]
else:
    train_length_path = 'train_length_' + str(train_length) + 'd/'
    wt_exp  = [1.00**i for i in range(train_length-1, -1, -1)]

output_path = workdir + 'output/regression/moneyness_' + str(M_min) + '_' + str(M_max) + step_path + adjusted_delta_path + train_length_path

if not os.path.exists(output_path):
    os.makedirs(output_path)
    os.makedirs(output_path + 'coef')
    os.makedirs(output_path + 'MSHE')
    os.makedirs(output_path + 'coefplot')
    os.makedirs(output_path + 'PNL')
    os.makedirs(output_path + 'PNL_plot')
    os.makedirs(output_path + 'scatter')

In [16]:
def plot_scatter(dict_plot_one, securityid, date_in, output_path, version = adjusted_delta):
    if version == 0:
        greek_letter = 'Raw Delta'
    else:
        greek_letter = 'Adjusted Delta'

    plt.rc('font', size=25)          # controls default text sizes
    datestr = pd.to_datetime(date_in[0]).strftime('%Y-%m-%d')
    fig, axs = plt.subplots(1, 2, figsize=(18,9), sharex=True, sharey=True)
    # fig.suptitle(tickername + ' ' + str(i) + ' ' + ids[i].strip() + ' ' + datestr)
    plt.setp(axs[:], xlabel=greek_letter + " X Change of Stock Price")
    plt.setp(axs[0], ylabel="Change of Option Price")
    
    w_C = dict_plot_one['C_w_train'] * 30
    w_e_C = dict_plot_one['C_w_earliest'] * 30
    axs[0].scatter(dict_plot_one['C_x_train'], dict_plot_one['C_y_train'], s = w_C)
    axs[0].plot(dict_plot_one['C_x_train'], dict_plot_one['C_coef'] * dict_plot_one['C_x_train'], color='tab:orange')
    axs[0].scatter(dict_plot_one['C_x_test'], dict_plot_one['C_y_test'], color='tab:red')
    axs[0].scatter(dict_plot_one['C_x_earliest'], dict_plot_one['C_y_earliest'], s = w_e_C, color='tab:grey')
    axs[0].annotate('Call Coefficient: ' + str(round(dict_plot_one['C_coef'][0], 4)), 
                    xy=(0, 1), xytext=(12, -12), va='top', xycoords='axes fraction', textcoords='offset points')
    axs[0].set_title("Call")
    
    w_P = dict_plot_one['P_w_train'] * 30
    w_e_P = dict_plot_one['P_w_earliest'] * 30
    axs[1].scatter(dict_plot_one['P_x_train'], dict_plot_one['P_y_train'], s = w_P)
    axs[1].plot(dict_plot_one['P_x_train'], dict_plot_one['P_coef'] * dict_plot_one['P_x_train'], color='tab:orange')
    axs[1].scatter(dict_plot_one['P_x_test'], dict_plot_one['P_y_test'], color='tab:red')
    axs[1].scatter(dict_plot_one['P_x_earliest'], dict_plot_one['P_y_earliest'], s = w_e_P, color='tab:grey')
    axs[1].annotate('Put Coefficient: ' + str(round(dict_plot_one['P_coef'][0], 4)), 
                    xy=(0, 1), xytext=(12, -12), va='top', xycoords='axes fraction', textcoords='offset points')
    axs[1].set_title("Put")   
    fig.tight_layout()        


    scatter_output_path = output_path + 'scatter/' + str(securityid) + '/'
    if not os.path.exists(scatter_output_path):
        os.makedirs(scatter_output_path)
    plt.savefig(scatter_output_path + str(securityid) + '_' + tickername + '_' + datestr + '.jpg')
        
    plt.close()

In [17]:
j = 0
df_MSHE_all = pd.DataFrame()
for i in [100]: # range(100, 119, 1): #{113993: 'Game Stop', 189943: 'AMC'}:  # {107899: 'NASDAQ 100 TR', 102480: 'NASDAQ 100 INDEX', 108105: 'SPX'}: #ids: # {111020: '3Com', 108005: 'NetBank', 111860: 'Walmart'}: 
    securityid = df_stock_list.loc[i, 'SecurityID']
    i = securityid
    tickername = df_stock_list.loc[df_stock_list['SecurityID'] == securityid, 'Ticker'].values[0].strip()
    j = j + 1
    print('*********************************************')
    print('No.' + str(j) + ' processing '+ str(i))     
    
    if os.path.exists(input_path + str(i) + '.csv') == False:
        print('Stock '+ str(i) + ' is not available')
        continue
    else:
        temp = pd.read_csv(input_path + str(i) + ".csv", parse_dates = ['Date'])
        # temp = temp.loc[temp['Date'] != pd.to_datetime("1998-10-12"), :]
        temp = temp.loc[~temp['IV0'].isna(), :]
        #------------------
        # Shrink moneyness
        #------------------
        bl = (temp['M0'] >= M_min-0.001) & (temp['M0'] <= M_max+0.001)
        temp = temp.loc[bl] 

        #---------------------
        # more cleaning
        #---------------------
        # continuous version rate:
        temp['on_ret'] = np.exp(temp['short_rate'] * 1 / 253)
        
        bl_C = ((temp['CallPut'] == 'C') & (temp['Delta_c'] < 10) & (temp['Delta_c'] > 0.01))
        bl_P = (temp['CallPut'] == 'P') & (temp['Delta_c'] > -10) & (temp['Delta_c'] < -0.01)
        temp = temp[bl_C | bl_P]
        
        temp = merge_syn(temp, securityid)
        ############# Calculating result using pre-bubble data ####################
        df_MSHE = pd.DataFrame()
        exist_coef = os.path.exists(output_path + 'coef/coef_' + str(i) + '.csv')
        exist_mshe = os.path.exists(output_path + 'MSHE/MSHE_' + str(i) + '.csv')
        
        if (exist_coef == True) & (exist_mshe == True):
            print('Stock '+ str(i) + ' MSHE result has existed already')        
            pass          
        else:
            df_PNL = pd.DataFrame()
            dict_output = {} 
            
            all_date = np.unique(temp.loc[:, 'Date'])
            train_date = all_date[0:train_length]
            test_date = all_date[train_length:]             
            df_test = temp[temp['Date'].isin(test_date)] 
            if len(test_date) == 0:
                print('insufficient observations because length of test date is 0')
                continue
            if all_date[0] < pd.to_datetime('2020-01-01'):
                pre_train_date = [t for t in all_date if t < pd.to_datetime('2020-01-01')] 
                pre_test_date = [t for t in all_date if (pd.to_datetime('2020-01-01') < t) & (t < test_date[0])] 
                df_pre_test = temp[temp['Date'].isin(pre_test_date)]       

                for s in range(int(np.ceil(len(pre_test_date)/step))):
                    if len(pre_test_date) == 0:
                        continue                    
                    pre_test_date_in = pre_test_date[0:step]
                    first_pre_test_date = str(pre_test_date_in[0])[:10]
                    df_train_rolling = temp[temp['Date'].isin(pre_train_date)]
                    df_test_rolling = temp[temp['Date'].isin(pre_test_date_in)]
                    pre_wt_exp = [1.00**i for i in range(len(pre_train_date)-1, -1, -1)]
                    df_weight_rolling = pd.DataFrame({'Date':pre_train_date, 'weight':pre_wt_exp})
                    
                    # print('-------------------------------------------------------------')
                    # print('s:', s)
                    # print('pre test date:', first_pre_test_date)
                    
                    df_PNL_one, dict_output_one, dict_plot_one, weights_test = calc_pnl(df_train_rolling, df_test_rolling, df_weight_rolling)
                    df_PNL = pd.concat([df_PNL, df_PNL_one])
                    dict_output[first_pre_test_date] = dict_output_one

                    plot_scatter(dict_plot_one, securityid, date_in = pre_test_date_in, output_path = output_path) 
                    
                    pre_train_date = np.concatenate([pre_train_date, pre_test_date[0:step]])
                    pre_test_date = pre_test_date[step:]

          
            for s in range(int(np.ceil(len(test_date)/step))):
                test_date_in = test_date[0:step]
                first_test_date = str(test_date_in[0])[:10]
                df_train_rolling = temp[temp['Date'].isin(train_date)]
                df_test_rolling = temp[temp['Date'].isin(test_date_in)]
                df_weight_rolling = pd.DataFrame({'Date':train_date, 'weight':wt_exp})
                
                # print('test date:', first_test_date)
                
                df_PNL_one, dict_output_one, dict_plot_one, weights_test = calc_pnl(df_train_rolling, df_test_rolling, df_weight_rolling)
                df_PNL = pd.concat([df_PNL, df_PNL_one])
                dict_output[first_test_date] = dict_output_one

                plot_scatter(dict_plot_one, securityid, date_in = test_date_in, output_path = output_path) 
                
                train_date = np.concatenate([train_date[step:], test_date[0:step]])
                test_date = test_date[step:]

            if len(df_PNL) == 0:
                df_PNL.loc[0, 'CallPut'] = 'C'
                df_PNL.loc[0, 'PNL'] = np.nan
                df_PNL.loc[1, 'CallPut'] = 'P'
                df_PNL.loc[1, 'PNL'] = np.nan 
            
            df_PNL.to_csv(output_path + 'PNL/PNL_' + str(i) + '.csv', index = False)
            
            df_MSHE.loc[i, "name"] = ids[i]
            df_MSHE.loc[i, "bubble_stock"] = dict_tech_label[i]
            df_MSHE.loc[i, "BS_hedge_C"] = BS_hedge(df_test, delta_var)[0]
            df_MSHE.loc[i, "BS_hedge_P"] = BS_hedge(df_test, delta_var)[1]
            df_MSHE.loc[i, "Fixed_hedge_C"] = Fixed_BS_Hedge(df_test, delta_var)[0]
            df_MSHE.loc[i, "Fixed_hedge_P"] = Fixed_BS_Hedge(df_test, delta_var)[1]
            df_MSHE.loc[i, "delta_hedge_C"] = np.mean(df_PNL[df_PNL['CallPut'] == 'C'].loc[:,'PNL']**2)
            df_MSHE.loc[i, "delta_hedge_P"] = np.mean(df_PNL[df_PNL['CallPut'] == 'P'].loc[:,'PNL']**2)    
            df_MSHE.to_csv(output_path + 'MSHE/MSHE_' + str(i) + '.csv', index = False) 
            df_MSHE_all = pd.concat([df_MSHE_all, df_MSHE])                     

            df_coef_ts = pd.DataFrame()
            r = 0
            for key in dict_output:
                df_coef_ts.loc[r, 'Date_str'] = key
                df_coef_ts.loc[r, 'Date'] = pd.to_datetime(key)
                df_coef_ts.loc[r, 'coef_C'] = dict_output[key]['C']['coef']
                df_coef_ts.loc[r, 'std_C'] = dict_output[key]['C']['std']
                df_coef_ts.loc[r, 'N_train_C'] = dict_output[key]['C']['N_train']
                df_coef_ts.loc[r, 'days_train_C'] = dict_output[key]['C']['days_train']
                df_coef_ts.loc[r, 'N_test_C'] = dict_output[key]['C']['N_test']
                df_coef_ts.loc[r, 'atm30_vol_test_C'] = dict_output[key]['C']['atm30_vol_test']
                df_coef_ts.loc[r, 'atm60_vol_test_C'] = dict_output[key]['C']['atm60_vol_test']
                df_coef_ts.loc[r, 'atm90_vol_test_C'] = dict_output[key]['C']['atm90_vol_test']                
                df_coef_ts.loc[r, 'atm30_optice_test_C'] = dict_output[key]['C']['atm30_oprice_test']
                df_coef_ts.loc[r, 'atm60_optice_test_C'] = dict_output[key]['C']['atm60_oprice_test']   
                df_coef_ts.loc[r, 'atm90_optice_test_C'] = dict_output[key]['C']['atm90_oprice_test']                 

                df_coef_ts.loc[r, 'coef_P'] = dict_output[key]['P']['coef']
                df_coef_ts.loc[r, 'std_P'] = dict_output[key]['P']['std']
                df_coef_ts.loc[r, 'N_train_P'] = -dict_output[key]['P']['N_train']
                df_coef_ts.loc[r, 'days_train_P'] = dict_output[key]['P']['days_train']
                df_coef_ts.loc[r, 'N_test_P'] = dict_output[key]['P']['N_test']   
                df_coef_ts.loc[r, 'atm30_vol_test_P'] = dict_output[key]['P']['atm30_vol_test']
                df_coef_ts.loc[r, 'atm60_vol_test_P'] = dict_output[key]['P']['atm60_vol_test']
                df_coef_ts.loc[r, 'atm90_vol_test_P'] = dict_output[key]['P']['atm90_vol_test']                
                df_coef_ts.loc[r, 'atm30_optice_test_P'] = dict_output[key]['P']['atm30_oprice_test']
                df_coef_ts.loc[r, 'atm60_optice_test_P'] = dict_output[key]['P']['atm60_oprice_test'] 
                df_coef_ts.loc[r, 'atm90_optice_test_P'] = dict_output[key]['P']['atm90_oprice_test']                 
                r += 1

            df_coef_ts = df_coef_ts.merge(temp[['Date', 'S0', 'AdjClosePrice', 'AdjClosePrice2']].drop_duplicates(), how = 'left', on = 'Date')
            df_coef_ts.to_csv(output_path + 'coef/coef_' + str(i) + '_ts.csv', index = False) 

*********************************************
No.1 processing 113993


# Result

## Hedging Error

In [None]:
df_MSHE = df_MSHE_all.copy()
df_MSHE["delta_C_pct_chg_MSHE"] = df_MSHE["delta_hedge_C"] / df_MSHE["BS_hedge_C"] - 1
df_MSHE["delta_P_pct_chg_MSHE"] = df_MSHE["delta_hedge_P"] / df_MSHE["BS_hedge_P"] - 1
df_MSHE["Fixed_C_pct_chg_MSHE"] = df_MSHE["Fixed_hedge_C"]/ df_MSHE["BS_hedge_C"] - 1
df_MSHE["Fixed_P_pct_chg_MSHE"] = df_MSHE["Fixed_hedge_P"]/ df_MSHE["BS_hedge_P"] - 1
df_MSHE = df_MSHE.dropna()
df_MSHE.to_csv(output_path + 'MSHE_GME_AMC.csv', index = 'True')
    
print("whole:")
print("Number of stocks in total:", len(df_MSHE))
print("Number of stocks which has less MSHE with delta hedge (Call)", len(df_MSHE[df_MSHE['delta_C_pct_chg_MSHE'] < 0.0]))
print("Number of stocks which has less MSHE with delta hedge (Put)", len(df_MSHE[df_MSHE['delta_P_pct_chg_MSHE'] < 0.0]))
print("Number of stocks which has less MSHE with Fixed hedge (Call)", len(df_MSHE[df_MSHE['Fixed_C_pct_chg_MSHE'] < 0.0]))
print("Number of stocks which has less MSHE with Fixed hedge (Put)", len(df_MSHE[df_MSHE['Fixed_P_pct_chg_MSHE'] < 0.0]))
df_MSHE.sort_values(by = ["bubble_stock", "delta_C_pct_chg_MSHE"], ascending = False)