In [2]:
from selenium import webdriver
import os
from selenium.webdriver.common.by import By
import time
import pandas as pd
import numpy as np
from datetime import datetime
import db_multiproc_calc_neutral2 as dmc
from bs4 import BeautifulSoup
import requests
from random import *
import glob

k_do_download = False
all_sym = ['GCJ23', 'CLK23', 'NGJ23', 'ZCK23', 'ZWK23', 'ZSK23', 'E6M23', 'ZNM23',
           'NGK23', 'SIK23', 'HGK23']
all_sym = ['SIK23']

# For a given option

cwd = os.getcwd()

executable_path = os.path.join(cwd,'chromedriver.exe')
display(executable_path)

date_str = datetime.now().strftime("%Y-%m-%d")
#date_str = '2023-03-24'
output_date_str = datetime.now().strftime("%Y%m%d")
file_date_str = date_str
barchart_datestr = pd.to_datetime(file_date_str).strftime('%m-%d-%Y')

download_path = os.path.join(cwd, 'local_download', file_date_str)

print(f'processing futures options:{file_date_str}')

try:
    os.mkdir(download_path)
except:
    print(f'{download_path} exists')


'C:\\Users\\mcbri\\PycharmProjects\\futureDataCapture\\chromedriver.exe'

processing futures options:2023-03-27
C:\Users\mcbri\PycharmProjects\futureDataCapture\local_download\2023-03-27 exists


In [3]:
def find_all_downloaded_contracts(download_path):
    df_all_ctr = find_all_downloaded_contracts(download_path)
    return df_all_ctr['futures_contract'].unique()

def find_all_downloaded_details(download_path):
    all_df = []
    for filename in glob.glob(os.path.join(download_path, '*.txt')):
        all_df.append(pd.read_csv(filename))
    df_all_ctr = pd.concat(all_df, axis=0)
    return df_all_ctr

def get_treasury_data(start_date):

    start_date_str = start_date.strftime("%Y%m")
    url='https://home.treasury.gov/resource-center/data-chart-center/interest-rates/pages/xml?data=daily_treasury_yield_curve&field_tdr_date_value_month='+start_date_str
    soup = BeautifulSoup(requests.get(url).text,'lxml')
    table = soup.find_all('m:properties')
    tbondvalues = []
    for i in table:
        tbondvalues.append([i.find('d:new_date').text[:10],i.find('d:bc_1month').text,i.find('d:bc_2month').text,i.find('d:bc_3month').text,i.find('d:bc_6month').text,i.find('d:bc_1year').text,i.find('d:bc_2year').text,i.find('d:bc_3year').text,i.find('d:bc_5year').text,i.find('d:bc_10year').text,i.find('d:bc_20year').text,i.find('d:bc_30year').text])
    ustcurve = pd.DataFrame(tbondvalues,columns=['date','1m','2m','3m','6m','1y','2y','3y','5y','10y','20y','30y'])
    ustcurve.iloc[:,1:] = ustcurve.iloc[:,1:].apply(pd.to_numeric)/100
    ustcurve['date'] = pd.to_datetime(ustcurve['date'])
    tbl_daily = ustcurve.sort_values(['date'])
    tbl_daily = tbl_daily.set_index('date').stack(level=0).reset_index()
    tbl_daily.columns = ['date', 'tenor', 'rate']

    return tbl_daily

def get_put_call_ratio(df_iv):
    # This takes the IV datafrmae and calcs both the delta adjusted put call and
    # natural put call ratio from the barcharts data
    df_iv = df_iv.dropna().copy()
    df_iv = df_iv.loc[:, ['strike', 'call_put', 'open_interest', 'delta']]
    df_iv['Delta-Adj-Put-Call-Ratio'] = df_iv['open_interest'] * df_iv['delta']
    df_iv = df_iv.drop(columns=['delta'])
    df_iv = df_iv.set_index(['strike', 'call_put']).groupby('call_put').sum()
    df_iv = df_iv.rename(columns={'open_interest':'Put-Call-Ratio'})
    pc_temp = df_iv.T
    return (pc_temp.loc[:,'P'] / pc_temp.loc[:,'C'])

def get_iv_data_set(sym, option_expiration, file_date_str, barchart_datestr):
    # compile the implied vol dataframe from the files downloaded from barcharts

    exp_date_str = option_expiration
    exp_file_date_str = pd.to_datetime(exp_date_str).strftime('%m_%d_%y')

    #iv_str = df_exp_data.values[0][1]
    #all_iv = pd.to_numeric(iv_str.replace('%',''))/100

    px_data_path = os.path.join('local_download', file_date_str, f'{sym}_daily_historical-data-{barchart_datestr}.csv')
    opx_data_path = os.path.join('local_download', file_date_str, f'{sym}-options-american-options-exp-{exp_file_date_str}-show-all-%futuresoptionsview%-daily-{barchart_datestr}.csv')
    gr_data_path = os.path.join('local_download', file_date_str, f'{sym}-volatility-greeks-exp-{exp_file_date_str}-show-all-{barchart_datestr}.csv')

    # read the options price data
    fpx = pd.read_csv(px_data_path, parse_dates=['Time']).dropna()
    fpx['date'] = fpx['Time'].map(lambda x:pd.to_datetime(x))
    fpx = fpx.drop(columns=['Time'])
    fpx=fpx.set_index(['date']).sort_index()
    close_px = fpx.loc[barchart_datestr, :]['Last']

    # read in the option prices and transform
    opx = pd.read_csv(opx_data_path)
    opx = opx.iloc[:-1,:]
    opx['call_put']=opx['Strike'].map(lambda x:x[-1])
    opx['num_strike']=opx['Strike'].map(lambda x:pd.to_numeric(x[:-1].replace('-','.').replace(',','')))
    opx=opx.drop(labels=['Strike', 'Delta', 'Prev Open', 'Prev High', 'Prev Low', 'Prev Change', 'Premium'], axis=1)
    opx = opx.rename(columns={'num_strike': 'Strike',
                              'Prev Volume':'Volume',
                              'Open Interest':'open_interest',
                              'Last':'mean_price'})
    opx = opx.set_index(['Strike', 'call_put'])

    # read in the greeks and transform
    gr = pd.read_csv(gr_data_path)
    gr = gr.iloc[:-1,:]

    gr['symbol']=sym
    gr['option_expiration']=pd.to_datetime(exp_date_str)
    gr['Strike']=gr['Strike'].map(lambda x:pd.to_numeric(x))
    gr['IV']=pd.to_numeric(gr['IV'].str[:-1])/100
    #RWMgr['IV']=all_iv
    gr['Type'] = gr['Type'].map({'Put':'P', 'Call':'C'})
    gr=gr.drop(labels=['IV Skew', 'Time', 'Last'], axis=1)
    gr=gr.rename(columns={'Type':'call_put'})
    gr=gr.set_index(['Strike', 'call_put'])

    # concat and make columns lowercase
    t=pd.concat([opx, gr], axis=1)
    t['close_px'] = close_px
    t = t.reset_index()
    t.columns = map(str.lower, t.columns)

    return t

def clean_implied_vol(df_iv_file):
    x = df_iv_file.set_index(['strike', 'call_put']).sort_index().copy()
    y = x['iv'].unstack('call_put')
    idx_put_zero = y['P']==0
    idx_call_zero = y['C']==0

    idx_call_zero_replace = idx_call_zero * ~idx_put_zero
    idx_put_zero_replace = idx_put_zero * ~idx_call_zero
    y.loc[idx_call_zero_replace, 'C'] = y.loc[idx_call_zero_replace, 'P']
    y.loc[idx_put_zero_replace, 'P'] = y.loc[idx_put_zero_replace, 'C']
    y = y.rolling(3, center=True, min_periods=0).median()
    y = y.stack('call_put')
    y.name = 'clean_iv'
    x1 = pd.concat([x,y],axis=1)
    x1['iv'] = x1['clean_iv']
    x1.drop(columns='clean_iv', inplace=True)
    return x1

def generate_neutral_futures(sym, option_expiration, file_date_str, barchart_datestr):

    t = get_iv_data_set(sym, option_expiration, file_date_str, barchart_datestr)
    df_iv = clean_implied_vol(t).dropna().reset_index()

    query_date = barchart_datestr
    root_symbol = sym

    unique_strikes = sorted(t['strike'])
    spot_prices = np.array(dmc.calc_spot_price_levels(unique_strikes), dtype=float)

    agg_neutral, df_greeks_by_strike, exp_neutral = \
        dmc.calc_daily_neutral_values( \
            df_iv,
            query_date,
            rf_rate,
            root_symbol,
            spot_prices)

    agg_neutral['Close'] = t['close_px'].values[0]
    agg_neutral['Symbol'] = sym
    return [agg_neutral, df_greeks_by_strike, exp_neutral]

In [4]:
rate_filename = os.path.join(download_path,'rate.csv')

if k_do_download:
    df_rate = get_treasury_data(pd.to_datetime(date_str))
    df_rate.to_csv(rate_filename, index=False)

df_rate = pd.read_csv(rate_filename)
rf_rate = df_rate.set_index(['date', 'tenor']).loc[date_str,'1m'].values[0]
rf_rate

0.0421999999999999

In [5]:
def bar_chart_get_driver(executable_path, download_path):
    chromeOptions = webdriver.ChromeOptions()
    chromeOptions.add_experimental_option("prefs", {
        "download.default_directory": download_path,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    })

    driver = webdriver.Chrome(executable_path=executable_path, options=chromeOptions)
    return driver

def bar_chart_login(driver):
    driver.get("https://www.barchart.com/login")
    driver.find_element(By.NAME, "email").click()
    driver.find_element(By.NAME, "email").send_keys("mcbride1689@gmail.com")
    driver.find_element(By.NAME, "password").click()
    driver.find_element(By.NAME, "password").send_keys("d5nJSz57A8C5")
    driver.find_element_by_class_name('login-button').click()
    return

def bar_chart_download(driver, date_str, sym, download=False):

    def on_current_page_download_near_next(driver, download):
        # Get the list of options in the dropdown
        driver.find_element(By.ID, "bc-options-toolbar__dropdown-month").click()
        dropdown = driver.find_element(By.ID, "bc-options-toolbar__dropdown-month")
        options = dropdown.text
        options = options.split('\n')
        options = [x.lstrip() for x in options]
        mon_expr=[]
        option_expiration=[]
        iv_all=[]

        # iterate through the options and get the expiration
        for i in [0, 1]:
            time.sleep(random()*3)

            try:
                driver.find_element(By.ID, "bc-options-toolbar__dropdown-month").click()
                dropdown = driver.find_element(By.ID, "bc-options-toolbar__dropdown-month")
                dropdown.find_element(By.XPATH, f"//option[. = '{options[i]}']").click()

                #click all expiries and daily
                time.sleep(random()*0.5)
                driver.find_element(By.NAME, "moneyness").click()
                time.sleep(random()*0.5)
                dropdown = driver.find_element(By.NAME, "moneyness")
                dropdown.find_element(By.XPATH, "//option[. = 'Show All']").click()
                time.sleep(random()*0.5)
                driver.find_element(By.CSS_SELECTOR, ".bc-datatable-toolbar:nth-child(3)").click()
                time.sleep(random()*0.5)
                try:
                    driver.find_element(By.NAME, "futuresOptionsTime").click()
                    time.sleep(random()*0.5)
                    dropdown = driver.find_element(By.NAME, "futuresOptionsTime")
                    dropdown.find_element(By.XPATH, "//option[. = 'Daily']").click()
                except:
                    print('')

                # Get the contract associated with the month
                temp_name = "#main-content-column > div > div.page-title.symbol-header-info.ng-scope > div.symbol-name > div > span:nth-child(2)"
                # note we remove ( and ) by removing the first and last character
                mon_expr.append(driver.find_element(By.CSS_SELECTOR, temp_name).text[1:-1])
                # Get the option expiration date
                temp_name = "#main-content-column > div > div:nth-child(4) > div > div:nth-child(1) > div > strong:nth-child(2)"
                option_expiration.append(driver.find_element(By.CSS_SELECTOR, temp_name).text)
                # Get the IV of all options
                temp_name = "#main-content-column > div > div:nth-child(4) > div > div.column.small-12.medium-4.text-medium-up-center > div > strong"
                iv_all.append(driver.find_element(By.CSS_SELECTOR, temp_name).text)

                if download:
                    time.sleep(random()*3)
                    driver.find_element(By.CSS_SELECTOR, ".toolbar-button > span").click()
                    time.sleep(3)

            except:
                print(options[i] + " not found")

        return pd.DataFrame(zip(mon_expr, option_expiration, iv_all), columns=['futures_contract', 'option_expiration', 'iv'])

    option_price_url = f"https://www.barchart.com/futures/quotes/{sym}/options?futuresOptionsTime=daily&moneyness=allRows"
    option_greek_url = f'https://www.barchart.com/futures/quotes/{sym}/volatility-greeks?moneyness=allRows'

    #-------------------
    # download the options prices
    time.sleep(2*random())
    driver.get(option_price_url)
    time.sleep(2)

    df_price_data = on_current_page_download_near_next(driver, download)

    # Get the option greeks
    time.sleep(2*random())
    driver.get(option_greek_url)
    time.sleep(2)

    df_greek_data = on_current_page_download_near_next(driver, download)

    df_price_data['source'] = 'price_page'
    df_greek_data['source'] = 'greeks_page'
    df_all = pd.concat([df_price_data, df_greek_data], axis=0)
    filename=os.path.join(download_path, f"{sym}_{date_str}.txt")
    df_all.to_csv(filename)

    # download all pricing files needed
    for ctr in df_all['futures_contract'].unique():
        future_price_url = f'https://www.barchart.com/futures/quotes/{ctr}/historical-download'

        # download the futures prices
        driver.get(future_price_url)

        # download the price history
        time.sleep(2*random())
        driver.find_element(By.CSS_SELECTOR, ".bc-price-history-checkboxes .checkbox").click()
        time.sleep(2)

        if download:
            driver.find_element(By.CSS_SELECTOR, ".add").click()
            time.sleep(3)
    return

In [6]:
if k_do_download:
    driver = bar_chart_get_driver(executable_path, download_path)
    bar_chart_login(driver)
    for sym in all_sym:
        bar_chart_download(driver, date_str, sym, k_do_download)
    driver.quit()

In [7]:
all_sym = find_all_downloaded_details(download_path)
all_contracts = all_sym[['futures_contract', 'option_expiration', 'iv']].drop_duplicates()
all_contracts

Unnamed: 0,futures_contract,option_expiration,iv
0,CLK23,04/17/23,43.60%
1,CLM23,05/17/23,44.89%
0,E6M23,04/06/23,8.76%
1,E6M23,05/05/23,8.76%
0,GCJ23,03/28/23,16.96%
1,GCM23,04/25/23,18.02%
0,HGK23,03/28/23,26.49%
1,HGK23,04/25/23,26.49%
0,NGJ23,03/28/23,81.23%
1,NGK23,04/25/23,92.44%


In [8]:
temp = []
for idx, row in all_contracts.iterrows():
    sym = row[0]
    option_expiration = row[1]
    temp.append(generate_neutral_futures(sym, option_expiration, file_date_str, barchart_datestr))

In [9]:
# Pull the data together and get the delta put call ratio from the files
temp_iv_data = []
for idx, row in all_contracts.iterrows():

    sym = row[0]
    option_expiration = row[1]

    df_iv = get_iv_data_set(sym, option_expiration, file_date_str, barchart_datestr)
    df_pcr = get_put_call_ratio(df_iv)
    df_iv['Put-Call-Ratio'] = df_pcr['Put-Call-Ratio']
    df_iv['Delta-Adj-Put-Call-Ratio'] = np.abs(df_pcr['Delta-Adj-Put-Call-Ratio'])
    df_iv = df_iv[['symbol', 'option_expiration', 'close_px', 'Put-Call-Ratio', 'Delta-Adj-Put-Call-Ratio']].tail(1)
    temp_iv_data.append(df_iv)
closepx_table = pd.concat(temp_iv_data).set_index('symbol')
closepx_table = closepx_table.rename(columns={'close_px':'Futures Close', 'option_expiration':'Opex Date'})

In [10]:
# Get the delta neutral and gamma neutral values and combine with final table
expn = [x[0] for x in temp]
final_output = pd.concat(expn).loc[-1,['symbol', 'delta_neutral', 'gamma_neutral' ]].set_index('symbol')
final_output = pd.concat([closepx_table, final_output], axis=1)
final_output = final_output.loc[:,['Opex Date', 'Futures Close', 'delta_neutral', 'gamma_neutral', 'Put-Call-Ratio', 'Delta-Adj-Put-Call-Ratio']]
final_output

Unnamed: 0_level_0,Opex Date,Futures Close,delta_neutral,gamma_neutral,Put-Call-Ratio,Delta-Adj-Put-Call-Ratio
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CLK23,2023-04-17,72.81,72.050796,68.824317,0.673086,0.861301
CLM23,2023-05-17,72.93,72.71739,70.482228,0.423797,1.033283
E6M23,2023-04-06,1.08485,1.074,1.091424,2.880011,0.446408
E6M23,2023-05-05,1.08485,1.064974,1.06874,1.182209,0.422762
GCJ23,2023-03-28,1953.8,1903.295509,1902.394976,0.785608,0.125325
GCM23,2023-04-25,1971.5,1909.789089,1905.339677,0.976353,0.261584
HGK23,2023-03-28,4.0785,0.0,0.0,0.624771,0.366848
HGK23,2023-04-25,4.0785,3.958368,3.867978,0.54254,0.433842
NGJ23,2023-03-28,2.088,0.0,0.0,0.552627,154.0625
NGK23,2023-04-25,2.215,2.595912,2.483902,0.771526,2.98196


In [11]:
final_output.to_csv(os.path.join(download_path,f'{output_date_str}.futures.neturals.csv'))

# Testing Follows

In [12]:
final_output_target = pd.read_csv(os.path.join(download_path,f'finalout.target.{file_date_str}.csv'), parse_dates=['Opex Date']).set_index('symbol')
test_target_sym = ['GCJ23', 'CLK23', 'NGJ23']

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\mcbri\\PycharmProjects\\futureDataCapture\\local_download\\2023-03-27\\finalout.target.2023-03-27.csv'

In [None]:
final_output.loc[test_target_sym,:]

In [None]:
 final_output_target.loc[test_target_sym, :]

In [None]:
# Check output against test
all_columns = final_output.columns

for c in all_columns:
    t1 = final_output.loc[test_target_sym,c]
    t2 = final_output_target.loc[test_target_sym,c]
    try:
        pd.testing.assert_series_equal(t1, t2, rtol=0.01, check_exact=False)
    except:
        display(pd.concat([t1, t2], axis=1))
