In [1]:
import pandas as pd
import numpy as np
import sys
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
from pandas.tseries.offsets import BDay
import os
import getpass

In [2]:
if getpass.getuser() in ['ygnmax']:
    if sys.platform == 'linux':
        workdir = '/home/ygnmax/Dropbox/research_nyu/hedge_vol/'
    if sys.platform == 'win32':
        workdir = 'C:/Users/ygnmax/Dropbox (Personal)/research_nyu/hedge_vol/'
os.chdir(workdir)

In [3]:
#################################
# 1. Get stock list / dictionary
#################################
# read dot-com bubble companies
df_dotcom = pd.read_excel(workdir + "data/dot_com_firms.xlsx", engine = 'openpyxl').dropna()
df_dotcom["SecurityID"] = df_dotcom["SecurityID"].astype(int)

# read big companies
df_big = pd.read_excel(workdir + "data/big_firms.xlsx", engine = 'openpyxl').dropna()
df_big["SecurityID"] = df_big["SecurityID"].astype(int)

# read other companies
df_other = pd.read_excel(workdir + "data/other_firms.xlsx").dropna()
df_other["SecurityID"] = df_other["SecurityID"].astype(int)

# append them together
df_stock_list = pd.concat([df_dotcom[['Name','Ticker','SecurityID', 'Internet']], 
                           df_big[['Name','Ticker','SecurityID', 'Internet']], 
                          df_other[['Name','Ticker','SecurityID', 'Internet']]])
# df_stock_list = pd.concat([df_stock_list, df_other[['Name','Ticker','SecurityID', 'Internet']]])
df_stock_list["Internet"] = df_stock_list["Internet"].astype(int)
df_stock_list = df_stock_list.dropna()
df_stock_list = df_stock_list.reset_index(drop = True)

In [4]:
datadir = workdir + 'data/raw/WRDS_2021/' 

# read and clean zero curve
zero_curve = pd.read_csv(workdir + 'data/raw/WRDS_2021/zero_curve_all.csv', parse_dates = ['date'])
zero_column={"date": "Date", "days": "Days",  "rate" : "Rate"}
zero_curve.rename(columns=zero_column, inplace=True)

%run "src/meme_stocks/functions_WRDS.py"
%run "src/meme_stocks/functions_greeks.py"
df_rate = preclean_interest(zero_curve, max_days = 1500) 

In [7]:
output_path = workdir + 'data/processed/intermediate/WRDS_2021/'
if not os.path.exists(output_path):
    os.makedirs(output_path)

df_rate = preclean_interest(zero_curve, max_days = 1500)

problem_stk_list = []
for i in [109]: #list(range(100,len(df_stock_list))):
    stkid = df_stock_list.loc[i, 'SecurityID']
    ticker = df_stock_list.loc[i, 'Ticker']

    print('*********************************************')
    print('processing '+ str(stkid))        
    
    if os.path.exists(output_path + 'df_' + str(stkid) + '.csv') == True:
        print('Stock '+ str(stkid) + ' exists already')        
        continue
    if os.path.exists(datadir + str(stkid)) == False:
        print('Stock '+ str(stkid) + ' is not available') 
        problem_stk_list.append(stkid)
        continue
    else:        
        df_stock, df_option, df_dividend, df_info = read_data(str(stkid))
        if len(df_option) < 2:
            print('Stock '+ str(stkid) + ' ' + ticker + ' is not available from OptionMerics')
            continue
        else:
            df = preclean_data(df_option, df_stock, stkid)
            df = merge_interest(df, df_rate)
            df = prefilter_data(df)
            # df_split = df_dividend[(df_dividend['DistributionType'] != 1)]
        
    if len(df) < 2:
        problem_stk_list.append(stkid)
        print('Stock '+ str(stkid) + ' ' + ticker + ' has insufficient observations')
        continue     
    else:     
        for paydate in df_dividend.PaymentDate:
            df = df[(df['Date'] != paydate)]
            df = df[df['Date'] != paydate - BDay(1)]

        df['SecurityID'] = stkid
        df['StockTicker'] = ticker
        
        # adjust dividend rate
        df_div = pd.read_csv(workdir + 'data/cleaned/synthetic/WRDS_2021/df_' + str(stkid) + '.csv', parse_dates = ['Date'])
        df_div30 = df_div.loc[(df_div['Maturity'] == 30) & (df_div['CallPut'] == 'C'), ['Date', 'impl_div0', 'impl_cdiv0']].rename(columns = {'impl_div0': 'impl_div30', 'impl_cdiv0': 'impl_cdiv30'})
        df_div60 = df_div.loc[(df_div['Maturity'] == 60) & (df_div['CallPut'] == 'C'), ['Date', 'impl_div0', 'impl_cdiv0']].rename(columns = {'impl_div0': 'impl_div60', 'impl_cdiv0': 'impl_cdiv60'})
        df_div90 = df_div.loc[(df_div['Maturity'] == 90) & (df_div['CallPut'] == 'C'), ['Date', 'impl_div0', 'impl_cdiv0']].rename(columns = {'impl_div0': 'impl_div90', 'impl_cdiv0': 'impl_cdiv90'})
        df_realdiv = df_div.loc[(df_div['Maturity'] == 90) & (df_div['CallPut'] == 'C'), ['Date', 'real_div0']].rename(columns = {'real_div0': 'real_div'})
        df = df.merge(df_div30, how = 'left', on = 'Date')
        df = df.merge(df_div60, how = 'left', on = 'Date')
        df = df.merge(df_div90, how = 'left', on = 'Date')
        df = df.merge(df_realdiv, how = 'left', on = 'Date')
        df['real_div_yield'] = df['real_div'] / df['S0']
        
#         ## greeks:
#         ## delta
#         df['delta_bs_impl_cdiv90'] = bs_call_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv90'])
#         df['delta_bs_impl_cdiv_P'] = bs_put_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv90'])        
#         df.loc[df['CallPut'] == 'P', 'delta_bs_impl_cdiv90'] = df.loc[df['CallPut'] == 'P','delta_bs_impl_cdiv_P']
#         del df['delta_bs_impl_cdiv_P']
#         ## more version:
#         # df['delta_bs_impl_cdiv60'] = bs_call_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv60'])
#         # df['delta_bs_impl_cdiv_P'] = bs_put_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv60'])        
#         # df.loc[df['CallPut'] == 'P', 'delta_bs_impl_cdiv60'] = df.loc[df['CallPut'] == 'P','delta_bs_impl_cdiv_P']
#         # del df['delta_bs_impl_cdiv_P']
#         # df['delta_bs_impl_cdiv30'] = bs_call_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv30'])
#         # df['delta_bs_impl_cdiv_P'] = bs_put_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv30'])        
#         # df.loc[df['CallPut'] == 'P', 'delta_bs_impl_cdiv30'] = df.loc[df['CallPut'] == 'P','delta_bs_impl_cdiv_P']
#         # del df['delta_bs_impl_cdiv_P']        

#         df['delta_bs_real_div'] = bs_call_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['real_div_yield'])
#         df['delta_bs_real_div_P'] = bs_put_delta(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['real_div_yield'])
#         df.loc[df['CallPut'] == 'P', 'delta_bs_real_div'] = df.loc[df['CallPut'] == 'P','delta_bs_real_div_P']
#         del df['delta_bs_real_div_P']

#         ## gamma
#         df['gamma_bs_impl_cdiv90'] = bs_gamma(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv90'])
#         df['gamma_bs_real_div'] = bs_gamma(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['real_div_yield'])

#         ## vega
#         df['vega_bs_impl_cdiv90'] = bs_vega(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['impl_cdiv90'])
#         df['vega_bs_real_div'] = bs_vega(vol=df['IV0'], S=df['S0'], K=df['K'], tau=df['tau'], r=df['r'], q=df['real_div_yield'])        
        
#         df['d0'] = df['real_div']        
        df.to_csv(output_path + 'df_' + str(stkid) + '.csv', index = False)

print('************ done *************')

*********************************************
processing 142943
Stock 142943 exists already
************ done *************
