# Fetch data for the replication part of the project

In [1]:
import pandas as pd
import polars as pl
import numpy as np
import wrds
import os
import sys
import sqlite3
from pathlib import Path

path_to_db = os.path.join(Path(os.getcwd()).parent) + "/data/db/option_prices.db"
db_uri = "sqlite:///" + path_to_db

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
conn_sqlite = sqlite3.connect("../data/db/option_prices.db")
# cur = conn_sqlite.cursor()

In [3]:
db = wrds.Connection(wrds_username=os.getenv("WRDS_USERNAME"), wrds_password=os.getenv("WRDS_PASSWORD"))

Loading library list...
Done


For the reproduction of the original paper we need the following data:
- **Stock prices** (daily) of the 8 stocks (Amazon, AMD, Boeing, Disney,
Meta, Netflix, PayPal, and Salesforce), from November 11, 2020 to February 12, 2021.
- **Option prices** of the 9 stocks in the same period. The author used only Americal type options, but we will see if we can use European type options as well (depends on the liquidity).
- **Implied volatility** of the 9 stocks in the same period.
- **Risk-free rate** in the same period.

Risk free rate can be obtained from the US Treasury website, but we can also get them from the OptionMetrics. The other data can be obtained from the `optionm_all` library (or `optionm`?). Thus, we will load the following files:
- Option_Price File   `opprcdYYYY` (for option prices and implied volatility)
- Option_Info File    `opinfd`     (for option type)
- Security_Price File `secprdYYYY` (for stock prices)


PERMNO of the stocks:
- Amazon: 84788
- AMD  : 61241
- Boeing: 19561
- Disney: 90805
- Meta: 13407
- Netflix: 89393
- PayPal: 15488
- Salesforce: 90215

## Options and stock prices

In [4]:
permno_mini_list = [84788, 61241, 19561, 90805, 13407, 89393,15488,90215]

In [5]:
sp500_const_permno = pd.read_sql("select * from sp500_constituents", conn_sqlite)
sp500_const_permno

Unnamed: 0,permno,comnam,ncusip,shrcd,exchcd,hsiccd,ticker,gvkey,iid,start,ending,conm,tic,cusip,cik,sic,naics,gsubind,gind
0,76129,3COM CORP,88553510,11,3,3674,COMS,010553,01,1996-01-02,2000-07-27,3COM CORP,COMS.,885535104,0000738076,3576,334119,45201020,452010
1,22592,3M CO,88579Y10,11,1,3841,MMM,007435,01,1957-03-01,2023-12-29,3M CO,MMM,88579Y101,0000066740,9997,999977,20105010,201050
2,10006,A C F INDUSTRIES INC,00080010,10,1,3743,ACF,001010,01,1957-03-01,1984-07-18,ACF INDUSTRIES INC,4165A,00099V004,0000910627,3743,336510,20304010,203040
3,50906,A D C TELECOMMUNICATIONS INC,00088630,11,3,3661,ADCT,001013,01,1999-08-02,2007-06-29,ADC TELECOMMUNICATIONS INC,ADCT.1,000886309,0000061478,3661,334210,45201020,452010
4,50906,A D C TELECOMMUNICATIONS INC,00088610,11,3,3661,ADCT,001013,01,1999-08-02,2007-06-29,ADC TELECOMMUNICATIONS INC,ADCT.1,000886309,0000061478,3661,334210,45201020,452010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2553,89070,ZIMMER HOLDINGS INC,98956P10,11,1,3842,ZMH,144559,01,2001-08-07,2023-12-29,ZIMMER BIOMET HOLDINGS INC,ZBH,98956P102,0001136869,3842,339113,35101010,351010
2554,84129,ZIONS BANCORPORATION,98970110,11,3,6021,ZION,011687,01,2001-06-25,2023-12-29,ZIONS BANCORPORATION NA,ZION,989701107,0000109380,6020,522110,40101015,401010
2555,84129,ZIONS BANCORPORATION N A,98970110,11,3,6021,ZION,011687,01,2001-06-25,2023-12-29,ZIONS BANCORPORATION NA,ZION,989701107,0000109380,6020,522110,40101015,401010
2556,13788,ZOETIS INC,98978V10,11,1,2834,ZTS,013721,01,2013-06-24,2023-12-29,ZOETIS INC,ZTS,98978V103,0001555280,2834,325412,35202010,352020


In [6]:
secid_permno_link  = pd.read_sql("select * from crsp_opm_link", conn_sqlite)
secid_permno_link

Unnamed: 0,secid,sdate,edate,permno
0,5001,1996-01-02,1996-03-13,10074
1,5002,1996-01-01,1996-02-22,10154
2,5004,1996-01-01,2000-01-27,80071
3,5005,1996-01-01,1997-08-12,85041
4,5006,1996-01-01,1996-08-28,10496
...,...,...,...,...
31472,218323,2023-03-16,2023-12-29,88885
31473,218324,2023-03-21,2023-12-29,23796
31474,218325,2023-03-17,2023-12-29,23814
31475,218326,2023-03-20,2023-12-29,23760


In [7]:
secid_mini_list = secid_permno_link[secid_permno_link.permno.isin(permno_mini_list)].secid.values
secid_mini_list

array([101121, 101310, 102265, 115422, 121592, 124166, 154402, 207609])

In [8]:
secid_mini_list

array([101121, 101310, 102265, 115422, 121592, 124166, 154402, 207609])

In [9]:
secids = tuple(secid_mini_list)
secids

(101121, 101310, 102265, 115422, 121592, 124166, 154402, 207609)

In [18]:
for year in [2020, 2021]:
    option_table = db.raw_sql(f"""select * from optionm.opprcd{year} 
    where date>'2020-11-11'::DATE
    and date<'2021-02-12'::DATE
    and secid in {secids}""")
    stock_table = db.raw_sql(f"""select * from optionm.secprd{year} 
    where date>'2020-11-11'::DATE
    and date<'2021-02-12'::DATE
    and secid in {secids}""")
 
    option_table.to_sql(name='option_price_table', con=conn_sqlite, if_exists='append', index=False)
    stock_table.to_sql(name='stock_price_table', con=conn_sqlite, if_exists='append', index=False)

In [19]:
option_table

Unnamed: 0,secid,date,symbol,symbol_flag,exdate,last_date,cp_flag,strike_price,best_bid,best_offer,...,theta,optionid,cfadj,am_settlement,contract_size,ss_flag,forward_price,expiry_indicator,root,suffix
0,101121.0,2021-01-04,AMD 210122C50000,1,2021-01-22,2020-12-21,C,50000.0,40.50,44.05,...,,137458183.0,1.0,0.0,100.0,0,,w,,
1,101121.0,2021-01-04,AMD 210122C55000,1,2021-01-22,2020-12-18,C,55000.0,35.50,39.05,...,,137458184.0,1.0,0.0,100.0,0,,w,,
2,101121.0,2021-01-04,AMD 210122C60000,1,2021-01-22,2021-01-04,C,60000.0,30.50,34.00,...,,137370359.0,1.0,0.0,100.0,0,,w,,
3,101121.0,2021-01-06,AMD 210108P118000,1,2021-01-08,2021-01-05,P,118000.0,27.55,27.75,...,,137273187.0,1.0,0.0,100.0,0,,w,,
4,101121.0,2021-01-06,AMD 210108P119000,1,2021-01-08,2021-01-06,P,119000.0,28.55,28.75,...,,137273188.0,1.0,0.0,100.0,0,,w,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27913,207609.0,2021-02-11,PYPL 230120P400000,1,2023-01-20,,P,400000.0,145.30,148.30,...,-15.683920,138484389.0,1.0,0.0,100.0,0,,,,
27914,207609.0,2021-02-11,PYPL 230120P410000,1,2023-01-20,,P,410000.0,152.90,156.80,...,-15.425170,138484390.0,1.0,0.0,100.0,0,,,,
27915,207609.0,2021-02-11,PYPL 230120P420000,1,2023-01-20,,P,420000.0,161.90,164.85,...,-15.273040,138484391.0,1.0,0.0,100.0,0,,,,
27916,207609.0,2021-02-11,PYPL 230120P90000,1,2023-01-20,2021-02-03,P,90000.0,1.80,4.00,...,-3.372966,136166910.0,1.0,0.0,100.0,0,,,,


In [21]:
stock_table

Unnamed: 0,secid,date,low,high,close,volume,return,cfadj,open,cfret,shrout
0,101121.0,2021-01-04,90.9150,96.0600,92.30,51802552.0,0.006433,2.0,92.11,2.0,1202710.0
1,101121.0,2021-01-05,91.4100,93.2077,92.77,34207989.0,0.005092,2.0,92.10,2.0,1202710.0
2,101121.0,2021-01-06,89.4600,92.2800,90.33,51911657.0,-0.026302,2.0,91.62,2.0,1202710.0
3,101121.0,2021-01-07,91.2000,95.5100,95.16,42897157.0,0.053471,2.0,91.33,2.0,1202710.0
4,101121.0,2021-01-08,93.2700,96.4000,94.58,39816400.0,-0.006095,2.0,95.98,2.0,1202710.0
...,...,...,...,...,...,...,...,...,...,...,...
219,207609.0,2021-02-05,264.7100,270.0000,269.44,7978598.0,-0.003661,1.0,268.61,1.0,1171690.0
220,207609.0,2021-02-08,274.6235,282.6989,282.17,10084303.0,0.047246,1.0,276.88,1.0,1171690.0
221,207609.0,2021-02-09,276.5100,286.8900,284.20,8696995.0,0.007194,1.0,280.38,1.0,1171180.0
222,207609.0,2021-02-10,278.4010,286.0800,283.18,9364487.0,-0.003589,1.0,285.10,1.0,1171180.0


## Interest rate data

In [None]:
# Fetch and save interest rates
ir = db.get_table(library='optionm', table='zerocd')
ir.to_sql('zerocd', con=conn_sqlite, if_exists='replace', index=False)

In [23]:
conn_sqlite.close()
db.close()