In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
from pandas.tseries.offsets import BDay
from scipy.optimize import minimize
import functools as ft
from joblib import Parallel, delayed
import time
import os
import sys
import getpass

In [2]:
if getpass.getuser() in ['ygnmax']:
    if sys.platform == 'linux':
        workdir = '/home/ygnmax/Dropbox/research_nyu/hedge_vol/'
    if sys.platform == 'win32':
        workdir = 'C:/Users/ygnmax/Dropbox (Personal)/research_nyu/hedge_vol/'

os.chdir(workdir)

In [3]:
#################################
# 1. Get stock list / dictionary
#################################
# read dot-com bubble companies
df_dotcom = pd.read_excel(workdir + "data/dot_com_firms.xlsx", engine = 'openpyxl').dropna()
df_dotcom["SecurityID"] = df_dotcom["SecurityID"].astype(int)

# read big companies
df_big = pd.read_excel(workdir + "data/big_firms.xlsx", engine = 'openpyxl').dropna()
df_big["SecurityID"] = df_big["SecurityID"].astype(int)

# read other companies
df_other = pd.read_excel(workdir + "data/other_firms.xlsx").dropna()
df_other["SecurityID"] = df_other["SecurityID"].astype(int)

# append them together
df_stock_list = pd.concat([df_dotcom[['Name','Ticker','SecurityID', 'Internet']], 
                           df_big[['Name','Ticker','SecurityID', 'Internet']], 
                          df_other[['Name','Ticker','SecurityID', 'Internet']]])
# df_stock_list = pd.concat([df_stock_list, df_other[['Name','Ticker','SecurityID', 'Internet']]])
df_stock_list["Internet"] = df_stock_list["Internet"].astype(int)
df_stock_list = df_stock_list.dropna()
df_stock_list = df_stock_list.reset_index(drop = True)

In [4]:
def cal_iv_core(idx, group):
    dict_IV_rows = {} 
    for j, row in group.iterrows():
        if row.impl_cdiv_median == 0:
            # print(j, ' dividend rate is 0')
            dict_IV_rows[j] = row.IV0
        else:
            date = row.Date
            S = row.S0
            X = row.K
            T = row.Maturity/360.0
            r_0 = row.r
            q_0 = row.impl_cdiv_median
            IV0 = row.IV0            
            if np.isnan(IV0.values):
                IV0 = 1.5
            if row.CallPut == 'C':    
                def f(x):
                    return (ABM(ft.partial(VP, K=X, CallPut='C'), S, T, r_0, x, 1000, q_0)-row.V0)**2
            else:
                def f(x):
                    return (ABM(ft.partial(VP,K=X, CallPut='P'), S, T, r_0, x, 1000, q_0)-row.V0)**2
            # Optimizing
            cons = ({'type': 'ineq', 'fun' : lambda x: np.array(x), 'jac': lambda x: np.array([1.0])})
            res = minimize(f, np.array([IV0]), constraints=cons, tol = 0.0001)
            dict_IV_rows[j] = res.x[0]
        
    df_IV_rows = pd.DataFrame(data = dict_IV_rows.items(), columns = ['index', 'IV0_c'])
    df_IV_rows = df_IV_rows.set_index(['index'])
    df_IV_rows_out = group.merge(df_IV_rows, left_index = True, right_index = True, how = 'left')
    dict_IV[idx] = df_IV_rows_out

In [5]:
import warnings
warnings.filterwarnings("ignore")
%run "src/meme_stocks/functions_greeks.py"

In [25]:
input_path = workdir + 'data/processed/intermediate/WRDS_2021/'

i = 100
stkid = df_stock_list.loc[i, 'SecurityID']
ticker = df_stock_list.loc[i, 'Ticker']
    
df = pd.read_csv(input_path + "df_" + str(stkid) + ".csv", parse_dates = ['Date']) 
df['impl_cdiv_median']=df.loc[:,['impl_cdiv30', 'impl_cdiv60', 'impl_cdiv90']].median(axis=1)

print('***************************************')
print('processing '+ str(stkid) + ' with ' + str(len(df)) + ' rows')

row = df.loc[[52718], :]


***************************************
processing 113993 with 210872 rows


In [None]:
# date = row.Date.values
# S = row.S0.values
# X = row.K.values
# T = row.Maturity.values/360.0
# r_0 = row.r.values
# q_0 = row.impl_cdiv_median.values
# IV0 = row.IV0   
# if np.isnan(IV0.values):
#     IV0 = 1.5
    
# if row.CallPut.values == 'C':    
#     def f(x):
#         return (ABM(ft.partial(VP, K=X, CallPut='C'), S, T, r_0, x, 1000, q_0)-row.V0)**2
# else:
#     def f(x):
#         return (ABM(ft.partial(VP,K=X, CallPut='P'), S, T, r_0, x, 1000, q_0)-row.V0)**2

# cons = ({'type': 'ineq', 'fun' : lambda x: np.array(x), 'jac': lambda x: np.array([1.0])})
# res = minimize(f, np.array([7.5]), constraints=cons, tol = 0.0001)
# res.x[0]

In [6]:
input_path = workdir + 'data/processed/intermediate/WRDS_2021/'
output_path = input_path

if not os.path.exists(output_path):
    os.makedirs(output_path)

problem_stk_list = []
for i in [100]: # list(df_stock_list.index):
    stkid = df_stock_list.loc[i, 'SecurityID']
    ticker = df_stock_list.loc[i, 'Ticker']
    
    if os.path.exists(output_path + '/iv_' + str(stkid) + '.csv') == True:
        print('***************************************')
        print('Stock '+ str(stkid) + ' IV has existed already')        
        continue
    if os.path.exists(input_path + "df_" + str(stkid) + ".csv") == False:
        print('*****************************')
        print('Stock '+ str(stkid) + ' is not available') 
        problem_stk_list.append(stkid)
        continue
    else:     
        df = pd.read_csv(input_path + "df_" + str(stkid) + ".csv", parse_dates = ['Date']) 
        try:
            df['impl_cdiv_median']=df.loc[:,['impl_cdiv30', 'impl_cdiv60', 'impl_cdiv90']].median(axis=1)

            print('***************************************')
            print('processing '+ str(stkid) + ' with ' + str(len(df)) + ' rows')
        except:
            continue

    if len(df) < 2:
        problem_stk_list.append(stkid)
        print('Stock '+ str(stkid) + ticker + ' has insufficient observations')
        continue  
#     elif len(df) > 20000:
#         print('Stock '+ str(stkid) + ticker + ' has too many observations, calculate them later')
#         continue
    else:     
        ###############################
        # Calculate Implied Volatility
        ###############################
        start_time = time.time()
        dict_IV = {}
        Parallel(n_jobs=8, require='sharedmem', verbose=1)(delayed(cal_iv_core)(idx, group) for idx, group in df.groupby(['Date', 'Expiration']))  
        print(str(stkid) + ' implied volatility within %s seconds' % (time.time()-start_time))
        df_IV_out = pd.DataFrame()
        for key, val in dict_IV.items():
            df_IV_out = pd.concat([df_IV_out, dict_IV[key]])
            
        df_IV_out.to_csv(output_path + '/iv_' + str(stkid) + '.csv', index = False)

***************************************
processing 113993 with 210872 rows


[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   38.9s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  3.3min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  6.7min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed: 11.4min
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed: 17.3min
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed: 24.1min
[Parallel(n_jobs=8)]: Done 2434 tasks      | elapsed: 31.3min
[Parallel(n_jobs=8)]: Done 3184 tasks      | elapsed: 41.1min
[Parallel(n_jobs=8)]: Done 4034 tasks      | elapsed: 64.1min
[Parallel(n_jobs=8)]: Done 4984 tasks      | elapsed: 87.6min
[Parallel(n_jobs=8)]: Done 6034 tasks      | elapsed: 324.6min
[Parallel(n_jobs=8)]: Done 7184 tasks      | elapsed: 679.3min
[Parallel(n_jobs=8)]: Done 8434 tasks      | elapsed: 902.3min
[Parallel(n_jobs=8)]: Done 8657 out of 8657 | elapsed: 936.5min finished


113993 implied volatility within 56189.059601306915 seconds


In [29]:
def cal_delta_core(idx, group):
    dict_V_self = {} 
    dict_V_self_up = {} 
    dict_V_self_down = {} 
    for j, row in group.iterrows():
        date = row.Date
        S = row.S0
        X = row.K
        T = row.Maturity/360.0
        IV0 = row.IV0_c
        r_0 = row.r
        q_0 = row.impl_cdiv_median

        epsilon = 0.01
        S_epsilon_up = S * (1+epsilon)
        S_epsilon_down = S * (1-epsilon)

        if row.CallPut == 'C':
            V_self = ABM(ft.partial(VP,K=X,CallPut='C'), S, T, r_0, IV0, 1000, q_0)                 
            V_self_epsilon_up = ABM(ft.partial(VP,K=X,CallPut='C'), S_epsilon_up, T, r_0, IV0, 1000, q_0) 
            V_self_epsilon_download = ABM(ft.partial(VP,K=X,CallPut='C'), S_epsilon_down, T, r_0, IV0, 1000, q_0) 
            dict_V_self[j] = V_self
            dict_V_self_up[j] = V_self_epsilon_up
            dict_V_self_down[j] = V_self_epsilon_download
        else:
            V_self = ABM(ft.partial(VP,K=X,CallPut='P'), S, T, r_0, IV0, 1000, q_0)                 
            V_self_epsilon_up = ABM(ft.partial(VP,K=X,CallPut='P'), S_epsilon_up, T, r_0, IV0, 1000, q_0) 
            V_self_epsilon_download = ABM(ft.partial(VP,K=X,CallPut='P'), S_epsilon_down, T, r_0, IV0, 1000, q_0) 
            dict_V_self[j] = V_self
            dict_V_self_up[j] = V_self_epsilon_up
            dict_V_self_down[j] = V_self_epsilon_download
    
    df_V_self = pd.DataFrame(data = dict_V_self.items(), columns = ['index', 'V_self'])
    df_V_self_epsilon_up = pd.DataFrame(data = dict_V_self_up.items(), columns = ['index', 'V_up'])
    df_V_self_epsilon_down = pd.DataFrame(data = dict_V_self_down.items(), columns = ['index', 'V_down'])
    
    df_V_self = df_V_self.set_index(['index'])
    df_V_self_epsilon_up = df_V_self_epsilon_up.set_index(['index'])
    df_V_self_epsilon_down = df_V_self_epsilon_down.set_index(['index'])
    
    df_V_out = group.merge(df_V_self, left_index = True, right_index = True, how = 'left')
    df_V_out = df_V_out.merge(df_V_self_epsilon_up, left_index = True, right_index = True, how = 'left')
    df_V_out = df_V_out.merge(df_V_self_epsilon_down, left_index = True, right_index = True, how = 'left')
    dict_delta[idx] = df_V_out

In [32]:
input_path = workdir + 'data/processed/intermediate/WRDS_2021/'
output_path = input_path

if not os.path.exists(output_path):
    os.makedirs(output_path)

problem_stk_list = []
for i in [100]: # list(df_stock_list.index):
    stkid = df_stock_list.loc[i, 'SecurityID']
    ticker = df_stock_list.loc[i, 'Ticker']
    
    # if os.path.exists(output_path + '/delta_' + str(stkid) + '.csv') == True:
    #     print('***************************************')
    #     print('Stock '+ str(stkid) + ' delta has existed already')        
    #     continue
    if os.path.exists(input_path + "iv_" + str(stkid) + ".csv") == False:
        print('*****************************')
        print('Stock '+ str(stkid) + ' is not available') 
        problem_stk_list.append(stkid)
        continue
    else:     
        df = pd.read_csv(input_path + "iv_" + str(stkid) + ".csv", parse_dates = ['Date']) 
        print('***************************************')
        print('processing '+ str(stkid) + ' with ' + str(len(df)) + ' rows')
            
    if len(df) < 2:
        problem_stk_list.append(stkid)
        print('Stock '+ str(stkid) + ticker + ' has insufficient observations')
        continue     
#     elif len(df) > 20000:
#         print('Stock '+ str(stkid) + ticker + ' has too many observations, calculate them later')
#         continue        
    else:     
        ##################
        # Calculate Delta
        ################## 
        start_time = time.time()
        dict_delta = {}
        Parallel(n_jobs=8, require='sharedmem', verbose=1)(delayed(cal_delta_core)(idx, group) for idx, group in df.groupby(['Date', 'Expiration']))  
        print(str(stkid) + ' delta within %s seconds' % (time.time()-start_time))

        df_delta_out = pd.DataFrame()
        for key, val in dict_delta.items():
            df_delta_out = pd.concat([df_delta_out, dict_delta[key]])

        df_delta_out['Delta_c'] = (df_delta_out['V_up'] - df_delta_out['V_down']) / (df_delta_out['S0'] * 0.02)
        df_delta_out.to_csv(output_path + '/delta_'+ str(stkid) + '.csv', index = False) 

***************************************
processing 113993 with 210872 rows


[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   26.3s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  2.0min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  4.0min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:  7.0min
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed: 10.4min
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed: 13.9min
[Parallel(n_jobs=8)]: Done 2434 tasks      | elapsed: 18.1min
[Parallel(n_jobs=8)]: Done 3184 tasks      | elapsed: 23.4min
[Parallel(n_jobs=8)]: Done 4034 tasks      | elapsed: 35.0min
[Parallel(n_jobs=8)]: Done 4984 tasks      | elapsed: 50.2min
[Parallel(n_jobs=8)]: Done 6034 tasks      | elapsed: 142.1min
[Parallel(n_jobs=8)]: Done 7184 tasks      | elapsed: 281.2min
[Parallel(n_jobs=8)]: Done 8434 tasks      | elapsed: 375.6min
[Parallel(n_jobs=8)]: Done 8657 out of 8657 | elapsed: 391.3min finished


113993 delta within 23479.45998096466 seconds


In [20]:
row = df_delta_out.loc[5095, :]
date = row.Date
S = row.S0
X = row.K
T = row.Maturity/360.0
IV0 = row.IV0_c
r_0 = row.r
q_0 = row.impl_cdiv_median

epsilon = 0.01
S_epsilon_up = S * (1+epsilon)
S_epsilon_down = S * (1-epsilon)

V_self = ABM(ft.partial(VP,K=X,CallPut='P'), S, T, r_0, IV0, 1000, q_0)                 
V_self_epsilon_up = ABM(ft.partial(VP,K=X,CallPut='P'), S_epsilon_up, T, r_0, IV0, 1000, q_0) 
V_self_epsilon_download = ABM(ft.partial(VP,K=X,CallPut='P'), S_epsilon_down, T, r_0, IV0, 1000, q_0) 