In [132]:
import pandas as pd
import numpy as np

import wrds

import os
from dotenv import load_dotenv
load_dotenv();

from mylib.load_from_wrds import query_options, query_stock, query_dividend, query_zero_curve
from mylib.rates_tools import create_yield_curve, merge_interest

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [133]:
secid = 113993
name = "Game Stop"
ticker = "GME"

startdate = '2019-01-01'
enddate = '2022-12-31'

# Download data from WRDS and create option dataframe

In [134]:
%%time

db = wrds.Connection(wrds_username=os.getenv('WRDS_USER'))

df_option = query_options(db, secid, startdate=startdate, enddate=enddate)
df_stock = query_stock(db, secid, startdate=startdate, enddate=enddate)
df_dividend = query_dividend(db, secid, startdate=startdate, enddate=enddate)
zero_curve = query_zero_curve(db, startdate=startdate, enddate=enddate)

db.close()

Loading library list...
Done
CPU times: user 4.33 s, sys: 2.42 s, total: 6.75 s
Wall time: 50.2 s


Add interest rates and underlying info to options dataframe:

In [142]:
%%time

df_rate = create_yield_curve(zero_curve, max_days=1500) 
df = merge_interest(df_option, df_rate)
df = df.merge(df_stock, how='inner', on=['date', 'secid'])
df['M0'] = df['close'] / df['K']

CPU times: user 1.67 s, sys: 822 ms, total: 2.49 s
Wall time: 2.49 s


# Construct tracer options

In [314]:
def construct_tracer(df, cp_flag, target_tau):
    df_select = df[df['cp_flag'] == cp_flag]

    d_list = []
    close_list = []
    V_interp_list = []
    IV_interp_list = []
    
    for d, dfg in df_select.groupby('date'):
        target_K = dfg.iloc[0]['close']

        upper_left  = dfg[(dfg['tau_days'] <= target_tau) & (dfg['K'] >= target_K)] 
        upper_right = dfg[(dfg['tau_days'] >= target_tau) & (dfg['K'] >= target_K)] 
        lower_left  = dfg[(dfg['tau_days'] <= target_tau) & (dfg['K'] <= target_K)] 
        lower_right = dfg[(dfg['tau_days'] >= target_tau) & (dfg['K'] <= target_K)] 

        
        #First interpolate strikes above ATM, then below ATM
        upper_left  = upper_left[upper_left['tau_days'] == upper_left['tau_days'].max()]
        upper_left  = upper_left[upper_left['K'] == upper_left['K'].min()]        
        upper_right = upper_right[upper_right['tau_days'] == upper_right['tau_days'].min()]
        upper_right = upper_right[upper_right['K'] == upper_right['K'].min()]
        lower_left  = lower_left[lower_left['tau_days'] == lower_left['tau_days'].max()]
        lower_left  = lower_left[lower_left['K'] == lower_left['K'].max()]   
        lower_right = lower_right[lower_right['tau_days'] == lower_right['tau_days'].min()]
        lower_right = lower_right[lower_right['K'] == lower_right['K'].max()]   

        V, IV, K = [], [], []
        for l, r in zip([upper_left, lower_left], [upper_right, lower_right]):
            if r.iloc[0]['tau_days'] > l.iloc[0]['tau_days']:
                weight_right = (target_tau - l.iloc[0]['tau_days']) / (r.iloc[0]['tau_days'] - l.iloc[0]['tau_days'])
            else:
                weight_right = 1     
            # if the difference in the numerator is zero then any weight would do it as the options are the same
            
            V.append((1 - weight_right) * l.iloc[0]['V0'] + weight_right * r.iloc[0]['V0'])
            IV.append((1 - weight_right) * l.iloc[0]['impl_volatility'] + weight_right * r.iloc[0]['impl_volatility'])
            K.append((1 - weight_right) * l.iloc[0]['K'] + weight_right * r.iloc[0]['K'])

        if K[0] > K[1]:
            weight_upper = (target_K - K[1]) / (K[0] - K[1])
        else:
            weight_upper = 1

        d_list.append(d)
        close_list.append(target_K)
        V_interp_list.append((1-weight_upper) * V[0] + weight_upper * V[1])
        IV_interp_list.append((1-weight_upper) * IV[0] + weight_upper * IV[1]) 

        
    df_out = pd.DataFrame({'date': d_list, 'close': close_list, 'V_interp': V_interp_list, 'IV_interp': IV_interp_list})
    df_out['K'] = df_out['close']
    df_out['cp_flag'] = cp_flag
    df_out['tau_days'] = target_tau
    return df_out

In [329]:
%%time

df_tracer = pd.DataFrame()
for target_tau in [30, 60, 90, 120, 150, 180]:
    for cp_flag in ['C', 'P']:
        df_tmp = construct_tracer(df, cp_flag, target_tau)
        df_tracer = pd.concat([df_tracer, df_tmp])

CPU times: user 21.6 s, sys: 599 ms, total: 22.2 s
Wall time: 22 s


In [331]:
df_tracer.tail()

Unnamed: 0,date,close,V_interp,IV_interp,K,cp_flag,tau_days
1003,2022-12-23,20.08,9.004779,1.155642,20.08,P,180
1004,2022-12-27,18.2,4.525207,1.152101,18.2,P,180
1005,2022-12-28,17.92,4.841524,1.169645,17.92,P,180
1006,2022-12-29,18.33,4.419737,1.161997,18.33,P,180
1007,2022-12-30,18.46,4.3031,1.166514,18.46,P,180


# Calculate implied dividend rate

In [10]:
def calc_syn_implied_div(stkid, df_input):
    ## inplied dividend
    df = df_input.copy()

    df_c = df[(df['CallPut'] == 'C') & (df['IV0'] >=0.00001)]
    df_p = df[(df['CallPut'] == 'P') & (df['IV0'] >=0.00001)]
    df_calc_rate = df_c[['Date', 'K', 'Maturity', 'V0', 'IV0']].merge(
                   df_p[['Date', 'K', 'Maturity', 'V0', 'IV0', 'S0', 'short_rate', 'r']], on=['Date', 'K', 'Maturity'], 
                   suffixes=['_C', '_P'])
    df_calc_rate['diff'] = abs(df_calc_rate['S0'] - df_calc_rate['K'])
    if len(df_calc_rate) == 0:
        print('No implied dividend '+ str(stkid))

        df_implied_rate = df[['Date', 'Expiration']].drop_duplicates()
        df_implied_rate['impl_div0'] = 0.0
        df_implied_rate['impl_cdiv0'] = 0.0
    else:
        df_implied_rate = pd.DataFrame()
        i = 0
        for idx, group in df_calc_rate.groupby(['Date', 'Maturity']):
            group = group[group['diff'] == min(group['diff'])]
            implied_d = []        

            # There may be 2 rows for the smallest difference between S0 and K (above or below) 
            for j, row in group.iterrows():
                date = row.loc['Date']
                expiration = row.loc['Maturity']
                S = row['S0']
                K = row['K']
                T = row['Maturity']/360.0
                IV_0 = (row['IV0_C'] + row['IV0_P']) / 2.0
                r_0 = row['r']

                CPop = row['V0_C'] - max(S - K, 0)
                PPop = row['V0_P'] - max(K - S, 0)
                d_0 = (-(CPop - PPop - r_0 * K * T)/(S * T))
                c_d_0 = 1/T * np.log((-(CPop - PPop)-(K-S)+np.exp(r_0 * T) * K)/S)

            df_implied_rate.loc[i,'Date'] = idx[0] 
            df_implied_rate.loc[i,'Expiration'] = idx[1]
            df_implied_rate.loc[i,'impl_div0'] = d_0
            df_implied_rate.loc[i,'impl_cdiv0'] = c_d_0
            df_implied_rate.loc[i,'S0'] = np.unique(df_calc_rate.loc[(df_calc_rate['Date'] == idx[0]) & (df_calc_rate['Expiration'] == idx[1]), 'S0'].values)[0]
            i += 1
    
    df_implied_rate['impl_div0'] = df_implied_rate['impl_div0'].fillna(value=0)
    df_implied_rate['impl_cdiv0'] = df_implied_rate['impl_cdiv0'].fillna(value=0)
    df_implied_rate['Maturity'] = df_implied_rate['Expiration'] - df_implied_rate['Date']
    df_implied_rate['Maturity'] = df_implied_rate['Maturity'].dt.days
    df_out = df_input.merge(df_implied_rate[['Date', 'Expiration', 'impl_div0','impl_cdiv0']], on = ['Date', 'Expiration'], how = 'left')
    df_out['impl_div0'] = df_out['impl_div0'].fillna(value=0)
    df_out['impl_cdiv0'] = df_out['impl_cdiv0'].fillna(value=0)
    return df_out

In [11]:
%%time
%run "src/functions_greeks.py"

input_path = workdir + 'data/processed/tracer/'
output_path = workdir + 'data/cleaned/tracer/'
if not os.path.exists(output_path):
    os.makedirs(output_path)

if os.path.exists(input_path + 'df_' + str(stkid) + '.csv') == False:
    print('Synthetic '+ str(stkid) + ' is not available')        
elif os.path.exists(output_path + 'df_' + str(stkid) + '.csv') == True:
    print('Synthetic '+ str(stkid) + ' exists already')        
elif os.stat(input_path + 'df_' + str(stkid) + '.csv').st_size < 2:
    print('Synthetic '+ str(stkid) + ' is not available')        
else:  
    df_ATM = pd.read_csv(input_path + 'df_' + str(stkid) + '.csv', parse_dates = ['Date'])
    df_ATM = df_ATM.rename(columns = {'Strike': 'K', 'StockPrice': 'S0', 'interp_IV': 'IV0', 'interp_V': 'V0'})
    df_ATM = merge_interest(df_ATM, df_rate)

    ## calculate implied dividend
    df_ATM_d = calc_syn_implied_div(stkid, df_ATM)
    df_syn = pd.DataFrame()
    for (callput, m), group in df_ATM_d.groupby(['CallPut', 'Maturity']):
        df_ATM_dtmp = group.loc[(group['Maturity'] == m) & (group['CallPut'] == callput) , :].sort_values('Date')
        df_ATM_dtmp['abs_impl_div0'] = np.abs(df_ATM_dtmp['impl_div0']) 
        df_ATM_dtmp['ma_impl_div0'] = df_ATM_dtmp[['Date', 'impl_div0']].rolling(m).mean()
        df_ATM_dtmp['rel_impl_div0'] = df_ATM_dtmp['impl_div0'] / df_ATM_dtmp['ma_impl_div0']    
        df_syn = pd.concat([df_syn, df_ATM_dtmp])
    df_syn['tau'] = df_syn['Maturity'] / 360.

    ## merge real dividend
    df_dividend = read_dividend(stkid)
    real_div = pd.DataFrame()
    for row in df_dividend.index:
        sdate = df_dividend.loc[row, 'DeclareDate']
        edate = df_dividend.loc[row, 'ExDate']
        div = df_dividend.loc[row, 'amount']
        real_div_tmp = pd.DataFrame({'Date':pd.date_range(sdate,edate-datetime.timedelta(days=1),freq='d')})
        real_div_tmp['real_div0'] = div
        real_div = pd.concat([real_div, real_div_tmp])
    if real_div.shape[0] == 0:
        print('no real dividend')
        df_syn['impl_cdiv0'] = 0
        df_syn['real_div0'] = 0
    else:
        df_syn = df_syn.merge(real_div, how = 'left', on = ['Date']) 
        df_syn['real_div0'] = df_syn['real_div0'].fillna(value=0)

    ## greeks:
    ## delta
    df_syn['delta_bs_impl_cdiv'] = bs_call_delta(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['impl_cdiv0'])
    df_syn['delta_bs_impl_cdiv_P'] = bs_put_delta(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['impl_cdiv0'])
    df_syn.loc[df_syn['CallPut'] == 'P', 'delta_bs_impl_cdiv'] = df_syn.loc[df_syn['CallPut'] == 'P','delta_bs_impl_cdiv_P']
    del df_syn['delta_bs_impl_cdiv_P']

    df_syn['delta_bs_real_div'] = bs_call_delta(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['real_div0'])
    df_syn['delta_bs_real_div_P'] = bs_put_delta(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['real_div0'])
    df_syn.loc[df_syn['CallPut'] == 'P', 'delta_bs_real_div'] = df_syn.loc[df_syn['CallPut'] == 'P','delta_bs_real_div_P']
    del df_syn['delta_bs_real_div_P']

    ## gamma
    df_syn['gamma_bs_impl_cdiv'] = bs_gamma(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['impl_cdiv0'])
    df_syn['gamma_bs_real_div'] = bs_gamma(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['real_div0'])

    ## vega
    df_syn['vega_bs_impl_cdiv'] = bs_vega(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['impl_cdiv0'])
    df_syn['vega_bs_real_div'] = bs_vega(vol=df_syn['IV0'], S=df_syn['S0'], K=df_syn['K'], tau=df_syn['tau'], r=df_syn['r'], q=df_syn['real_div0'])        

    df_syn.to_csv(output_path + 'df_' + str(stkid) + '.csv', index = False)
    print('Tracer '+ str(stkid) + ' done')

KeyError: "['Expiration'] not in index"