In [1]:
import pandas as pd
import numpy as np
import sys
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 1000)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pandas.tseries.offsets import BDay
from sklearn.linear_model import LinearRegression
import os
import getpass
import seaborn as sns

In [2]:
if getpass.getuser() in ['ygnmax']:
    if sys.platform == 'linux':
        workdir = '/home/ygnmax/Dropbox/research_nyu/hedge_vol/'
    if sys.platform == 'win32':
        workdir = 'C:/Users/ygnmax/Dropbox/research_nyu/hedge_vol/'
        
os.chdir(workdir)
%run "src/meme_stocks/functions_WRDS.py"
%run "src/meme_stocks/functions_greeks.py"

In [3]:
##############################
# Get stock list / dictionary
##############################
df_bubble = pd.read_excel(workdir + "data/dot_com_firms.xlsx", engine = 'openpyxl').dropna()
df_bubble["SecurityID"] = df_bubble["SecurityID"].astype(int)
df_big = pd.read_excel(workdir + "data/big_firms.xlsx", engine = 'openpyxl').dropna()
df_big["SecurityID"] = df_big["SecurityID"].astype(int)
# read other companies
df_other = pd.read_excel(workdir + "data/other_firms.xlsx").dropna()
df_other["SecurityID"] = df_other["SecurityID"].astype(int)

df_stock_list = pd.concat([df_bubble[['Name','Ticker','SecurityID', 'Internet']], 
                           df_big[['Name','Ticker','SecurityID', 'Internet']], 
                          df_other[['Name','Ticker','SecurityID', 'Internet']]])
df_stock_list["Internet"] = df_stock_list["Internet"].astype(int)
df_stock_list = df_stock_list.dropna()
df_stock_list = df_stock_list.reset_index(drop = True)

ids = {}
dict_tech_label = {}
for i in list(df_stock_list.index):
    ids[df_stock_list.loc[i, 'SecurityID']] = df_stock_list.loc[i, 'Name']
    dict_tech_label[df_stock_list.loc[i, 'SecurityID']] = df_stock_list.loc[i, 'Internet']

In [4]:
i = 113993
securityid = i
tickername = df_stock_list.loc[df_stock_list['SecurityID'] == securityid, 'Ticker'].values[0].strip()
fact_output_path = workdir + 'output/fact/WRDS_2021/' + str(i)
if not os.path.exists(output_path):
    os.mkdir(output_path)

df_stock, df_option, df_dividend, df_info = read_data(str(securityid))
df = df_option.copy()
print(df.shape)

day_count = 360
df = df.drop_duplicates()
df['Date'] = pd.to_datetime(df['Date'])
df['Expiration'] = pd.to_datetime(df['Expiration'])
df['LastTradeDate'] = pd.to_datetime(df['LastTradeDate'])
df['K'] = df['Strike'] / 1000.0
df['V0'] = (df['BestBid'] + df['BestOffer'])/2
df['Maturity'] = df['Expiration'] - df['Date']
df['Maturity'] = df.Maturity.dt.days
df['tau'] = df['Maturity'] / day_count
df['IV0'] = df['ImpliedVolatility']
df = df[['Date', 'K', 'Expiration',
       'CallPut', 'BestBid', 'BestOffer', 'LastTradeDate', 'Volume',
       'IV0', 'Delta', 'Gamma', 'Vega', 'Theta', 'OptionID', 
       'V0', 'Maturity', 'tau', 'OpenInterest']]
df = df.sort_values(by = ['Date', 'Expiration', 'CallPut', 'K'])

df_stk = df_stock.copy()
df_stk['S0'] = df_stk['ClosePrice']

df = df.merge(df_stk[['Date', 'S0', 'AdjClosePrice', 'AdjClosePrice2', 'AdjustmentFactor', 'AdjustmentFactor2']], 
              how = 'left', on = 'Date')

df = df[df['Volume'] >= 1]
df = df[df['BestBid'] > 0.049]
df['M0'] = df['S0'] / df['K'] 
df = df[df['Maturity'] >= 1]
# bl = ((df['CallPut'] == 'C') & (df['M0'] < 1.001)) | ((df['CallPut'] == 'P') & (df['M0'] > 0.999))
# df = df.loc[bl]
print(df.shape)


df_volume = pd.read_csv(workdir + 'data/raw/WRDS_2021/113993/volume.csv', parse_dates = ['date'])
df_volume_C = df_volume.loc[df_volume['cp_flag'] == 'C', :].copy().rename(columns = {'date':'Date', 'volume':'volume_C', 'open_interest':'openinterest_C'})
df_volume_P = df_volume.loc[df_volume['cp_flag'] == 'P', :].copy().rename(columns = {'date':'Date', 'volume':'volume_P', 'open_interest':'openinterest_P'})

df_syn = pd.read_csv(workdir + 'data/cleaned/synthetic/WRDS_2021/df_113993.csv')
df_syn30 = df_syn.loc[df_syn['Maturity'] == 30, :].copy()
df_syn60 = df_syn.loc[df_syn['Maturity'] == 60, :].copy()
df_syn90 = df_syn.loc[df_syn['Maturity'] == 90, :].copy()

# df_syn.head()

(835990, 26)
(331230, 24)


In [5]:
df_P = df[df['CallPut'] == 'P'].copy()
df_P['Moneyness'] = 'in'
df_P.loc[df_P['M0'] > 0.999, 'Moneyness'] = 'out'
df_P_group = df_P.groupby(['Date', 'Moneyness']).agg({'Volume':'sum', 'OpenInterest':'sum'})
df_P_group.columns = df_P_group.columns.get_level_values(0)  
df_P_group = df_P_group.reset_index()

df_C = df[df['CallPut'] == 'C'].copy()
df_C['Moneyness'] = 'in'
df_C.loc[df_C['M0'] < 1.001, 'Moneyness'] = 'out'
df_C_group = df_C.groupby(['Date', 'Moneyness']).agg({'Volume':'sum', 'OpenInterest':'sum'})
df_C_group.columns = df_C_group.columns.get_level_values(0)  
df_C_group = df_C_group.reset_index()

df_C_in = df_C_group.loc[df_C_group['Moneyness'] == 'in', ['Date', 'Volume', 'OpenInterest']].copy().rename(columns = {'Volume':'volume_C_in', 'OpenInterest':'openinterest_C_in'})
df_P_in = df_P_group.loc[df_P_group['Moneyness'] == 'in', ['Date', 'Volume', 'OpenInterest']].copy().rename(columns = {'Volume':'volume_P_in', 'OpenInterest':'openinterest_P_in'})
df_C_out = df_C_group.loc[df_C_group['Moneyness'] == 'out', ['Date', 'Volume', 'OpenInterest']].copy().rename(columns = {'Volume':'volume_C_out', 'OpenInterest':'openinterest_C_out'})
df_P_out = df_P_group.loc[df_P_group['Moneyness'] == 'out', ['Date', 'Volume', 'OpenInterest']].copy().rename(columns = {'Volume':'volume_P_out', 'OpenInterest':'openinterest_P_out'})
df_all = pd.merge(df_C_in, df_P_in, on = 'Date', how = 'outer')
df_all = pd.merge(df_all, df_C_out, on = 'Date', how = 'outer')
df_all = pd.merge(df_all, df_P_out, on = 'Date', how = 'outer')
df_all = pd.merge(df_all, df_stk[['Date', 'S0', 'Volume']], on = 'Date', how = 'left')
df_all = pd.merge(df_all, df_volume_C[['Date', 'volume_C', 'openinterest_C']], on = 'Date', how = 'left')
df_all = pd.merge(df_all, df_volume_P[['Date', 'volume_P', 'openinterest_P']], on = 'Date', how = 'left')


df_all['volume_C_dif'] = df_all['volume_C'].diff(1)
df_all['volume_P_dif'] = df_all['volume_P'].diff(1)
df_all['volume_C_out_dif'] = df_all['volume_C_out'].diff(1)
df_all['volume_P_out_dif'] = df_all['volume_P_out'].diff(1)
df_all['volume_C_in_dif'] = df_all['volume_C_in'].diff(1)
df_all['volume_P_in_dif'] = df_all['volume_P_in'].diff(1)

df_all['openinterest_C_dif'] = df_all['openinterest_C'].diff(1)
df_all['openinterest_P_dif'] = df_all['openinterest_P'].diff(1)
df_all['openinterest_C_out_dif'] = df_all['openinterest_C_out'].diff(1)
df_all['openinterest_P_out_dif'] = df_all['openinterest_P_out'].diff(1)
df_all['openinterest_C_in_dif'] = df_all['openinterest_C_in'].diff(1)
df_all['openinterest_P_in_dif'] = df_all['openinterest_P_in'].diff(1)

df_all.head()

Unnamed: 0,Date,volume_C_in,openinterest_C_in,volume_P_in,openinterest_P_in,volume_C_out,openinterest_C_out,volume_P_out,openinterest_P_out,S0,Volume,volume_C,openinterest_C,volume_P,openinterest_P,volume_C_dif,volume_P_dif,volume_C_out_dif,volume_P_out_dif,volume_C_in_dif,volume_P_in_dif,openinterest_C_dif,openinterest_P_dif,openinterest_C_out_dif,openinterest_P_out_dif,openinterest_C_in_dif,openinterest_P_in_dif
0,2019-01-02,997.0,9082.0,102.0,33817.0,2228.0,19438.0,457.0,36973.0,13.07,2124200,3557.0,92774.0,589.0,90713.0,,,,,,,,,,,,
1,2019-01-03,383.0,2050.0,225.0,47725.0,1458.0,38509.0,711.0,5705.0,12.97,1750353,1876.0,95160.0,1079.0,90815.0,-1681.0,490.0,-770.0,254.0,-614.0,123.0,2386.0,102.0,19071.0,-31268.0,-7032.0,13908.0
2,2019-01-04,6243.0,23908.0,577.0,7996.0,11851.0,60069.0,5748.0,57705.0,15.24,11915692,18977.0,95881.0,7924.0,91558.0,17101.0,6845.0,10393.0,5037.0,5860.0,352.0,721.0,743.0,21560.0,52000.0,21858.0,-39729.0
3,2019-01-07,1272.0,21752.0,201.0,4619.0,3284.0,45531.0,2557.0,59141.0,15.48,4718028,5398.0,102218.0,2803.0,94782.0,-13579.0,-5121.0,-8567.0,-3191.0,-4971.0,-376.0,6337.0,3224.0,-14538.0,1436.0,-2156.0,-3377.0
4,2019-01-08,820.0,21027.0,410.0,7550.0,10671.0,48818.0,1803.0,54650.0,15.81,3444019,11549.0,104038.0,2450.0,95814.0,6151.0,-353.0,7387.0,-754.0,-452.0,209.0,1820.0,1032.0,3287.0,-4491.0,-725.0,2931.0


In [57]:
fig, axs = plt.subplots(2, 1, figsize=(34,15), sharex=True, sharey=False)                

axs[0].plot(df_all.Date, df_all.S0, '-k', label='Adjusted Stock Price')
axs[0].set_ylabel("Stock Price", fontsize=20)
axs[0].tick_params(axis='y', labelsize=18)
axs[0].legend(loc=2, fontsize=20)
ax0 = axs[0].twinx()
ax0.bar(df_all.Date, df_all.Volume, color=(0.5, 0.5, 0.5, 0.5), label = "Stock Volume")
ax0.set_ylabel("Volume: Stock", fontsize=20) 
ax0.tick_params(axis='y', labelsize=18)
ax0.legend(loc=1, fontsize=20)  


axs[1].plot(df_all.Date, df_all.openinterest_C_out, 'b', label='Open Interest: Out-of-the-Money Call')
axs[1].plot(df_all.Date, df_all.openinterest_P_out, 'r', label='Open Interest: Out-of-the-Money Put') 
# axs[1].plot(df_all.Date, df_all.openinterest_C_in, 'b--', label='Open Interest: In-the-Money Call')
# axs[1].plot(df_all.Date, df_all.openinterest_P_in, 'r--', label='Open Interest: In-the-Money Put') 
# axs[1].plot(df_all.Date, df_all.openinterest_C, 'b--', label='Open Interest: Call')
# axs[1].plot(df_all.Date, df_all.openinterest_P, 'r--', label='Open Interest: Put') 
axs[1].set_ylabel("Open Interest", fontsize=20)
axs[1].tick_params(axis='y', labelsize=18)
# axs[1].set_ylim([0, 2])            
axs[1].legend(loc=2, fontsize=20)
ax1 = axs[1].twinx()
ax1.bar(df_all.Date, df_all.volume_C_out, color=(0.2, 0.2, 0.2, 0.8), label = "Option Volume: Out-of-the-Money Call")
ax1.bar(df_all.Date, df_all.volume_P_out, color=(0.5, 0.5, 0.5, 0.5), label = "Option Volume: Out-of-the-Money Put")
# ax1.bar(df_all.Date, -df_all.volume_C_in, color=(0.2, 0.2, 0.2, 0.8), label = "Option Volume: In-the-Money Call")
# ax1.bar(df_all.Date, df_all.volume_P_in, color=(0.5, 0.5, 0.5, 0.5), label = "Option Volume: In-the-Money Put")
# ax1.bar(df_all.Date, df_all.volume_C, color=(0.2, 0.2, 0.2, 0.8), label = "Option Volume: Call")
# ax1.bar(df_all.Date, df_all.volume_P, color=(0.5, 0.5, 0.5, 0.5), label = "Option Volume: Put")
ax1.set_ylabel("Volume: Option", fontsize=20)  
ax1.legend(loc=1, fontsize=20)
ax1.tick_params(axis='y', labelsize=18)
# maxlevel = round(max([max(abs(df_all.N_train_C)), max(abs(df_all.N_train_P))])/1000)*1000 + 1000
# ax2.set_ylim([-maxlevel, maxlevel]) 

axs[1].xaxis.set_major_locator(mdates.DayLocator(interval=20))
axs[1].set_xlim([pd.to_datetime('2020-02-01'), pd.to_datetime('2021-10-01')])
plt.setp(axs[1].get_xticklabels(), rotation=90, fontsize=18)
# fig.suptitle('Coefficients in Test Period: ' + tickername + ' ' + str(i) + ' ' + ids[i].strip(), fontsize=16, x = 0.5, y = 0.99)
fig.tight_layout()
plt.savefig(fact_output_path + '/stock_volume_' + str(securityid) + '_' + tickername + '.jpg')
plt.close(fig)      