In [106]:
import datetime as dt
import pandas as pd
from pandas_datareader import data as pdr
import plotly.offline as plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
yf.pdr_override()
import os
plotly.init_notebook_mode(connected=True)
pd.options.plotting.backend = 'plotly'
import wrds
import numpy as np
import gdown
from zipfile import ZipFile


## Uncomment the following cell to get the necessary data to run this notebook

In [107]:
# """
# Download the project datafiles from Google Drive.

#     -> files will be saved in a new folder: "data".
# """

# url = "https://drive.google.com/u/0/uc?id=1N5bw2N-zbs0aoUiNx0l8tkWiKx4O9YxS"

# zipped_data = "data.zip"
# gdown.download(url, zipped_data)
# with ZipFile(zipped_data, 'r') as zip:
#     zip.extractall("data/")
# os.remove("data.zip")


## Specifying date range

In [108]:
# selecting dates to be 10 year window after Feng et al. 2012's paper
end = dt.datetime(2023, 1, 1)
start = dt.datetime(2013, 1, 1)
start, end

(datetime.datetime(2013, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 0, 0))

## Select stock/ticker

# Importing stock tickers from 
from https://stockmarketmba.com/stocksinthesp500.php


In [109]:
path = "data/Stocks_in_SP_500_Index.xlsx"
data = pd.read_excel(path,skiprows=1)
data.head()

Unnamed: 0,Symbol,Description,Category2,Category3,GICS Sector,Market cap,Dividend yield,Price to TTM earnings,Price to TTM sales,Price to book value,Action
0,AAPL,Apple Inc,Common stocks,Large cap,Information Technology,1988832912360,0.0073,0.0,0.0,0.0,Analyze
1,MSFT,Microsoft Corp,Common stocks,Large cap,Information Technology,1657655067218,0.0114,0.0,0.0,0.0,Analyze
2,GOOG,Alphabet Inc Class C,Common stocks,Large cap,Communication Services,1042954820000,0.0,0.0,0.0,0.0,Analyze
3,GOOGL,Alphabet Inc Class A,Common stocks,Large cap,Communication Services,1042954820000,0.0,0.0,0.0,0.0,Analyze
4,AMZN,Amazon.Com Inc.,Common stocks,Large cap,Consumer Discretionary,847961495109,0.0,0.0,0.0,0.0,Analyze


In [110]:
tickers = data['Symbol'].to_list()

In [111]:
path = "data/Stocks in the Dow Jones Industrial Average.csv"
data_2 = pd.read_csv(path,skiprows=0)
data_2.head()

Unnamed: 0,Symbol,Description,Category2,GICS Sector,Market cap,Market Cap Weight,Index Weight,Dividend yield,Country,Action
0,UNH,Unitedhealth Group Inc,Common stocks,Health Care,"$444,974,402,526",4.54%,9.55%,1.30%,USA,Analyze
1,GS,Goldman Sachs Group Inc,Common stocks,Financials,"$118,213,948,325",1.21%,7.13%,2.39%,USA,Analyze
2,HD,Home Depot Inc,Common stocks,Consumer Discretionary,"$331,370,012,335",3.38%,6.36%,2.27%,USA,Analyze
3,AMGN,Amgen Inc,Common stocks,Health Care,"$141,070,338,378",1.44%,5.22%,2.82%,USA,Analyze
4,MCD,Mcdonalds Corp,Common stocks,Consumer Discretionary,"$194,854,052,228",1.99%,5.14%,2.09%,USA,Analyze


In [112]:
tickers_dow = data_2['Symbol'].to_list()

In [113]:
with open(r'used_ticker_list_dow.txt', 'w') as file:
    for item in tickers_dow:
        file.write("%s\n" % item)
    print('done')

done


## pandas_datareader module

In [114]:
df = pdr.get_data_yahoo(tickers, start, end)

[*********************100%***********************]  503 of 503 completed

2 Failed downloads:
- BRK.B: No timezone found, symbol may be delisted
- BF.B: No data found for this date range, symbol may be delisted


In [115]:
df.index = pd.to_datetime(df.index).date
df.index

Index([2013-01-02, 2013-01-03, 2013-01-04, 2013-01-07, 2013-01-08, 2013-01-09,
       2013-01-10, 2013-01-11, 2013-01-14, 2013-01-15,
       ...
       2022-12-16, 2022-12-19, 2022-12-20, 2022-12-21, 2022-12-22, 2022-12-23,
       2022-12-27, 2022-12-28, 2022-12-29, 2022-12-30],
      dtype='object', length=2518)

# Making a DataFrame containing Adjusted close Daily returns and volume returns from Yahoo finance

In [116]:
#checking for missing values & printing the columns with missing values
col_with_missing_values = {}
for col in df['Close'].columns:
    # counting the columns with missing values in Close
    sum_of_missing_values = df['Close'][col].isnull().sum()
    # also counting the columns with missing values in Volume
    sum_of_missing_values += df['Volume'][col].isnull().sum()
    if sum_of_missing_values > 0:
        col_with_missing_values[col] = sum_of_missing_values

In [117]:
df_col_missing_values = pd.DataFrame(col_with_missing_values, index = [0]).T
df_col_missing_values.columns = ['Missing Values']
list_of_tickers_with_missing_values = df_col_missing_values.index.to_list()

In [118]:
print(f'stocks with missing values {list_of_tickers_with_missing_values}')
print(f'stocks to de removed: {len(list_of_tickers_with_missing_values)}')


stocks with missing values ['ALLE', 'ANET', 'BF.B', 'BRK.B', 'CARR', 'CDAY', 'CDW', 'CEG', 'CFG', 'CTLT', 'CTVA', 'CZR', 'DOW', 'ETSY', 'FOX', 'FOXA', 'FTV', 'HLT', 'HPE', 'HWM', 'INVH', 'IQV', 'IR', 'KEYS', 'KHC', 'LW', 'MRNA', 'NCLH', 'NWS', 'NWSA', 'OGN', 'OTIS', 'PAYC', 'PYPL', 'QRVO', 'SEDG', 'SYF', 'VICI', 'WRK', 'ZTS']
stocks to de removed: 40


In [120]:
df_close_copy = df['Close'].copy()
df_open_copy = df['Open'].copy()
df_volume_copy = df['Volume'].copy()

In [121]:
# removing stocks with missing values
df_close_copy.drop(list_of_tickers_with_missing_values, axis=1, inplace=True)
df_volume_copy.drop(list_of_tickers_with_missing_values, axis=1, inplace=True)
df_open_copy.drop(list_of_tickers_with_missing_values, axis=1, inplace=True)
df_close_clean = df_close_copy
df_open_clean = df_open_copy
df_volume_clean = df_volume_copy

In [16]:
df_close_clean.to_csv('data/f_close_clean.csv')
df_open_clean.to_csv('data/df_open_clean.csv')
df_volume_clean.to_csv('data/df_volume_clean.csv')

In [123]:
df_volume_clean

Unnamed: 0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
2013-01-02,8790205,6662000,800500,560518000,13767900,1972400,20266400,2932800,4040500,6483800,...,6595000,2487000,2702000,16143700,856600,927300,6091328,1439425,213800,2551100
2013-01-03,5751791,5398400,520600,352965200,16739300,2026700,22148200,1966200,3340700,3906000,...,3967200,1731900,2403600,13268200,525500,610800,3654574,1622250,99900,2267900
2013-01-04,6432897,12048300,615000,594333600,21372100,2461500,15820100,1591800,3145600,3809300,...,3639400,2129100,1750100,11427900,660400,563400,3782685,1226524,146000,3577700
2013-01-07,3589505,5730600,1054400,484156400,17897100,1803600,13120000,1296900,2262800,3632100,...,2460400,1501000,2856800,11799800,554100,481300,5289417,985710,90600,2286000
2013-01-08,3896925,8034400,840000,458707200,17863300,1533000,15042300,1810800,2502800,3080900,...,5202600,976300,3761500,14226400,698900,1084500,17253686,932356,112600,3758600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779400,17181200,524500,63814900,2685100,528000,2618400,890300,1213300,1629800,...,1932800,1134800,1201300,11539400,1266800,321100,700800,789800,180900,1263300
2022-12-27,879500,18878200,717100,69007800,2669200,740300,2927400,666700,1009600,1464300,...,2266000,4026500,1580600,11962100,998800,468200,1300200,755600,252500,704200
2022-12-28,784300,20470400,685500,85438400,2944500,804700,3265200,1107900,1387000,1672100,...,2244100,2131500,1513700,10702100,1430500,480400,964800,750100,241200,680300
2022-12-29,854400,19706300,719100,75703700,3112500,1480000,3047800,1063400,1516800,1793100,...,1846000,1431100,1398000,10534000,1139500,516300,875700,686600,274900,685300


In [17]:
# taking Daily returns (percent change of the close of the stocks)  and storing in a new dataframe
# also dropping the first row as it will be NaN after percent change transformaion
# usually returns would be described as pct change in price at time t vs. t-1, but this transformation will not be used since we found out feng et al. 2012 moddelled daily returns as log (xt+1) - log(xt) which we will do as well.

df_daily_returns_clean = df_close_clean.apply(np.log) - df_open_clean.apply(np.log) 

In [18]:
used_ticker_symbols = df_daily_returns_clean.columns
used_ticker_symbols

Index(['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACGL', 'ACN', 'ADBE',
       ...
       'WY', 'WYNN', 'XEL', 'XOM', 'XRAY', 'XYL', 'YUM', 'ZBH', 'ZBRA',
       'ZION'],
      dtype='object', length=463)

In [19]:
with open(r'used_ticker_list.txt', 'w') as file:
    for item in used_ticker_symbols:
        file.write("%s\n" % item)
    print('done')

done


# loading WRDS Computat trading volume Data

In [21]:
volume_stock_data = pd.read_csv(r'data/stock_volume_WRDS_query_csv.csv', parse_dates=True, index_col=2)


Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.



In [22]:
volume_stock_data = volume_stock_data.drop(columns=['gvkey', 'iid', 'conm'])

In [23]:
volume_stock_data['tic'].nunique()

970

In [24]:
volume_stock_data['Date'] = volume_stock_data.index
volume_stock_data_clean_pivot = volume_stock_data.pivot(index = 'Date', columns='tic',values = 'cshtrd')

In [25]:
volume_stock_data_clean_pivot.shape

(4071, 970)

In [26]:
volume_stock_data_clean_pivot.index

DatetimeIndex(['2007-01-02', '2007-01-03', '2007-01-04', '2007-01-05',
               '2007-01-08', '2007-01-09', '2007-01-10', '2007-01-11',
               '2007-01-12', '2007-01-15',
               ...
               '2022-12-30', '2023-01-03', '2023-01-04', '2023-01-05',
               '2023-01-06', '2023-01-09', '2023-01-10', '2023-01-11',
               '2023-01-12', '2023-01-13'],
              dtype='datetime64[ns]', name='Date', length=4071, freq=None)

In [27]:
list_of_missing = []
for col in volume_stock_data_clean_pivot.columns:
    list_of_missing.append((str(col), volume_stock_data_clean_pivot[col].isnull().sum()))

In [28]:
volume_stock_data_clean_pivot= volume_stock_data_clean_pivot[start:end][used_ticker_symbols]

In [29]:
# we will fill the missing values with the previous day's value, otherwise we need to throw away to much data

volume_stock_data_clean_pivot.fillna(method='backfill', inplace=True)

In [30]:
volume_stock_data_clean_pivot 

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,6290038.0,43167060.0,800421.0,19986670.0,13767660.0,1972401.0,20266410.0,977536.0,4039095.0,6483720.0,...,6594952.0,2486815.0,2702053.0,16144970.0,856539.0,927252.0,4376855.0,1397462.0,213748.0,2551059.0
2013-01-03,4114289.0,43167060.0,520539.0,12579170.0,16739210.0,2027242.0,22148090.0,655362.0,3340674.0,3905904.0,...,3967047.0,1731879.0,2403510.0,13268470.0,525445.0,610706.0,2627253.0,1574966.0,99849.0,2267804.0
2013-01-04,4601506.0,43167060.0,614933.0,21196320.0,21372200.0,2462183.0,15819140.0,530563.0,3145680.0,3809146.0,...,3624749.0,2129025.0,1743995.0,11427870.0,660389.0,563325.0,2719375.0,1189537.0,145944.0,3577660.0
2013-01-07,2567557.0,43167060.0,1051969.0,17262620.0,17896920.0,1803504.0,13121470.0,423123.0,2262426.0,3632022.0,...,2450680.0,1500928.0,2853210.0,11799780.0,554059.0,481287.0,3729703.0,954238.0,90581.0,2285917.0
2013-01-08,2787813.0,43167060.0,840003.0,16350190.0,17863000.0,1533163.0,15053860.0,580903.0,2468876.0,3080868.0,...,5203250.0,975921.0,3762904.0,14228400.0,698803.0,1084482.0,12406140.0,905965.0,112590.0,3758558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779426.0,17160810.0,524534.0,63755280.0,2684724.0,528005.0,2618369.0,890265.0,1213251.0,1627915.0,...,1932785.0,1134475.0,1201346.0,11534290.0,1264320.0,321094.0,699735.0,789770.0,180932.0,1263345.0
2022-12-27,879543.0,18836880.0,717073.0,68887690.0,2669090.0,740310.0,2927352.0,666705.0,1009563.0,1463215.0,...,2265992.0,4024281.0,1580610.0,11874640.0,998778.0,468173.0,1299183.0,742499.0,252510.0,704184.0
2022-12-28,784312.0,20388570.0,685513.0,85247210.0,2944327.0,804725.0,3264409.0,1107929.0,1386969.0,1668511.0,...,2244112.0,2131050.0,1513689.0,10675270.0,1430535.0,480378.0,964755.0,750108.0,241209.0,679957.0
2022-12-29,854026.0,19652320.0,719051.0,75569790.0,3112518.0,1478972.0,3047818.0,1063408.0,1516779.0,1789922.0,...,1845747.0,1431045.0,1398037.0,10526150.0,1139467.0,516306.0,875749.0,686569.0,274913.0,685349.0


In [31]:
df_volume_clean_wrds = volume_stock_data_clean_pivot

In [32]:
# removing rows from df_volume_clean_wrds to match df_volume_clean, so  that dataframes are same shape. using both yahoo finance and WRDS Computat volumes data to see if there is a difference in the results
mask_3 = volume_stock_data_clean_pivot.index.isin(df_daily_returns_clean.index)
df_volume_clean_wrds = df_volume_clean_wrds[mask_3]


In [33]:
df_volume_clean.index

Index([2013-01-02, 2013-01-03, 2013-01-04, 2013-01-07, 2013-01-08, 2013-01-09,
       2013-01-10, 2013-01-11, 2013-01-14, 2013-01-15,
       ...
       2022-12-16, 2022-12-19, 2022-12-20, 2022-12-21, 2022-12-22, 2022-12-23,
       2022-12-27, 2022-12-28, 2022-12-29, 2022-12-30],
      dtype='object', length=2518)

In [34]:
df_volume_clean_wrds

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,6290038.0,43167060.0,800421.0,19986670.0,13767660.0,1972401.0,20266410.0,977536.0,4039095.0,6483720.0,...,6594952.0,2486815.0,2702053.0,16144970.0,856539.0,927252.0,4376855.0,1397462.0,213748.0,2551059.0
2013-01-03,4114289.0,43167060.0,520539.0,12579170.0,16739210.0,2027242.0,22148090.0,655362.0,3340674.0,3905904.0,...,3967047.0,1731879.0,2403510.0,13268470.0,525445.0,610706.0,2627253.0,1574966.0,99849.0,2267804.0
2013-01-04,4601506.0,43167060.0,614933.0,21196320.0,21372200.0,2462183.0,15819140.0,530563.0,3145680.0,3809146.0,...,3624749.0,2129025.0,1743995.0,11427870.0,660389.0,563325.0,2719375.0,1189537.0,145944.0,3577660.0
2013-01-07,2567557.0,43167060.0,1051969.0,17262620.0,17896920.0,1803504.0,13121470.0,423123.0,2262426.0,3632022.0,...,2450680.0,1500928.0,2853210.0,11799780.0,554059.0,481287.0,3729703.0,954238.0,90581.0,2285917.0
2013-01-08,2787813.0,43167060.0,840003.0,16350190.0,17863000.0,1533163.0,15053860.0,580903.0,2468876.0,3080868.0,...,5203250.0,975921.0,3762904.0,14228400.0,698803.0,1084482.0,12406140.0,905965.0,112590.0,3758558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779426.0,17160810.0,524534.0,63755280.0,2684724.0,528005.0,2618369.0,890265.0,1213251.0,1627915.0,...,1932785.0,1134475.0,1201346.0,11534290.0,1264320.0,321094.0,699735.0,789770.0,180932.0,1263345.0
2022-12-27,879543.0,18836880.0,717073.0,68887690.0,2669090.0,740310.0,2927352.0,666705.0,1009563.0,1463215.0,...,2265992.0,4024281.0,1580610.0,11874640.0,998778.0,468173.0,1299183.0,742499.0,252510.0,704184.0
2022-12-28,784312.0,20388570.0,685513.0,85247210.0,2944327.0,804725.0,3264409.0,1107929.0,1386969.0,1668511.0,...,2244112.0,2131050.0,1513689.0,10675270.0,1430535.0,480378.0,964755.0,750108.0,241209.0,679957.0
2022-12-29,854026.0,19652320.0,719051.0,75569790.0,3112518.0,1478972.0,3047818.0,1063408.0,1516779.0,1789922.0,...,1845747.0,1431045.0,1398037.0,10526150.0,1139467.0,516306.0,875749.0,686569.0,274913.0,685349.0


In [35]:
# reference: Wharton Research Data Services. "WRDS" wrds.wharton.upenn.edu, accessed 2023-01-16.

# Loading WRDS Computat shares outstanding Data

In [37]:
shares_outstanding_stock_data = pd.read_csv(r'data/Stock_sharesoutstanding_WRDS_query_csv.csv', parse_dates=True, index_col=2)


Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.



In [38]:
shares_outstanding_stock_data.columns

Index(['gvkey', 'iid', 'tic', 'cshoc'], dtype='object')

In [39]:
shares_outstanding_stock_data = shares_outstanding_stock_data.drop(columns=['gvkey', 'iid'])

In [40]:
shares_outstanding_stock_data['tic'].nunique()

949

In [41]:
shares_outstanding_stock_data.tic.nunique()

949

In [42]:
shares_outstanding_stock_data[ 'Date'] = shares_outstanding_stock_data.index

In [43]:
shares_outstanding_stock_data_clean_pivot = shares_outstanding_stock_data.pivot(index = 'Date', columns='tic',values = 'cshoc')

In [44]:
shares_outstanding_stock_data_clean_pivot.shape

(3815, 949)

In [45]:
list_of_missing = []
for col in shares_outstanding_stock_data_clean_pivot.columns:
    list_of_missing.append((str(col), shares_outstanding_stock_data_clean_pivot[col].isnull().sum()))

In [46]:
shares_outstanding_stock_data_clean_pivot= shares_outstanding_stock_data_clean_pivot[start:end][used_ticker_symbols]

In [47]:
# we will fill the missing values with the previous day's value, otherwise we need to throw away to much data

shares_outstanding_stock_data_clean_pivot.fillna(method='backfill', inplace=True)
df_shares_outstanding_clean = shares_outstanding_stock_data_clean_pivot
df_shares_outstanding_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [48]:
df_volume_clean.shape

(2518, 463)

In [49]:
df_shares_outstanding_clean.shape

(2539, 463)

In [50]:
df_daily_returns_clean.shape

(2518, 463)

In [51]:
df_volume_clean_wrds

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,6290038.0,43167060.0,800421.0,19986670.0,13767660.0,1972401.0,20266410.0,977536.0,4039095.0,6483720.0,...,6594952.0,2486815.0,2702053.0,16144970.0,856539.0,927252.0,4376855.0,1397462.0,213748.0,2551059.0
2013-01-03,4114289.0,43167060.0,520539.0,12579170.0,16739210.0,2027242.0,22148090.0,655362.0,3340674.0,3905904.0,...,3967047.0,1731879.0,2403510.0,13268470.0,525445.0,610706.0,2627253.0,1574966.0,99849.0,2267804.0
2013-01-04,4601506.0,43167060.0,614933.0,21196320.0,21372200.0,2462183.0,15819140.0,530563.0,3145680.0,3809146.0,...,3624749.0,2129025.0,1743995.0,11427870.0,660389.0,563325.0,2719375.0,1189537.0,145944.0,3577660.0
2013-01-07,2567557.0,43167060.0,1051969.0,17262620.0,17896920.0,1803504.0,13121470.0,423123.0,2262426.0,3632022.0,...,2450680.0,1500928.0,2853210.0,11799780.0,554059.0,481287.0,3729703.0,954238.0,90581.0,2285917.0
2013-01-08,2787813.0,43167060.0,840003.0,16350190.0,17863000.0,1533163.0,15053860.0,580903.0,2468876.0,3080868.0,...,5203250.0,975921.0,3762904.0,14228400.0,698803.0,1084482.0,12406140.0,905965.0,112590.0,3758558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779426.0,17160810.0,524534.0,63755280.0,2684724.0,528005.0,2618369.0,890265.0,1213251.0,1627915.0,...,1932785.0,1134475.0,1201346.0,11534290.0,1264320.0,321094.0,699735.0,789770.0,180932.0,1263345.0
2022-12-27,879543.0,18836880.0,717073.0,68887690.0,2669090.0,740310.0,2927352.0,666705.0,1009563.0,1463215.0,...,2265992.0,4024281.0,1580610.0,11874640.0,998778.0,468173.0,1299183.0,742499.0,252510.0,704184.0
2022-12-28,784312.0,20388570.0,685513.0,85247210.0,2944327.0,804725.0,3264409.0,1107929.0,1386969.0,1668511.0,...,2244112.0,2131050.0,1513689.0,10675270.0,1430535.0,480378.0,964755.0,750108.0,241209.0,679957.0
2022-12-29,854026.0,19652320.0,719051.0,75569790.0,3112518.0,1478972.0,3047818.0,1063408.0,1516779.0,1789922.0,...,1845747.0,1431045.0,1398037.0,10526150.0,1139467.0,516306.0,875749.0,686569.0,274913.0,685349.0


In [52]:
df_volume_clean

Unnamed: 0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
2013-01-02,8790205,6662000,800500,560518000,13767900,1972400,20266400,2932800,4040500,6483800,...,6595000,2487000,2702000,16143700,856600,927300,6091328,1439425,213800,2551100
2013-01-03,5751791,5398400,520600,352965200,16739300,2026700,22148200,1966200,3340700,3906000,...,3967200,1731900,2403600,13268200,525500,610800,3654574,1622250,99900,2267900
2013-01-04,6432897,12048300,615000,594333600,21372100,2461500,15820100,1591800,3145600,3809300,...,3639400,2129100,1750100,11427900,660400,563400,3782685,1226524,146000,3577700
2013-01-07,3589505,5730600,1054400,484156400,17897100,1803600,13120000,1296900,2262800,3632100,...,2460400,1501000,2856800,11799800,554100,481300,5289417,985710,90600,2286000
2013-01-08,3896925,8034400,840000,458707200,17863300,1533000,15042300,1810800,2502800,3080900,...,5202600,976300,3761500,14226400,698900,1084500,17253686,932356,112600,3758600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779400,17181200,524500,63814900,2685100,528000,2618400,890300,1213300,1629800,...,1932800,1134800,1201300,11539400,1266800,321100,700800,789800,180900,1263300
2022-12-27,879500,18878200,717100,69007800,2669200,740300,2927400,666700,1009600,1464300,...,2266000,4026500,1580600,11962100,998800,468200,1300200,755600,252500,704200
2022-12-28,784300,20470400,685500,85438400,2944500,804700,3265200,1107900,1387000,1672100,...,2244100,2131500,1513700,10702100,1430500,480400,964800,750100,241200,680300
2022-12-29,854400,19706300,719100,75703700,3112500,1480000,3047800,1063400,1516800,1793100,...,1846000,1431100,1398000,10534000,1139500,516300,875700,686600,274900,685300


In [53]:
# removing rows from df_shares_outstanding_clean and df_volume_clean that are not in df_daily_returns_clean, so dataframes are same shape
mask_1 = df_shares_outstanding_clean.index.isin(df_volume_clean_wrds.index)
df_shares_outstanding_clean = df_shares_outstanding_clean[mask_1]
mask_2 = df_shares_outstanding_clean.index.isin(df_daily_returns_clean.index)
df_shares_outstanding_clean = df_shares_outstanding_clean[mask_2]
df_volume_clean = df_volume_clean[mask_2] 
assert df_shares_outstanding_clean.shape == df_volume_clean.shape == df_daily_returns_clean.shape


In [54]:
df_shares_outstanding_clean


tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [55]:
df_volume_clean

Unnamed: 0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
2013-01-02,8790205,6662000,800500,560518000,13767900,1972400,20266400,2932800,4040500,6483800,...,6595000,2487000,2702000,16143700,856600,927300,6091328,1439425,213800,2551100
2013-01-03,5751791,5398400,520600,352965200,16739300,2026700,22148200,1966200,3340700,3906000,...,3967200,1731900,2403600,13268200,525500,610800,3654574,1622250,99900,2267900
2013-01-04,6432897,12048300,615000,594333600,21372100,2461500,15820100,1591800,3145600,3809300,...,3639400,2129100,1750100,11427900,660400,563400,3782685,1226524,146000,3577700
2013-01-07,3589505,5730600,1054400,484156400,17897100,1803600,13120000,1296900,2262800,3632100,...,2460400,1501000,2856800,11799800,554100,481300,5289417,985710,90600,2286000
2013-01-08,3896925,8034400,840000,458707200,17863300,1533000,15042300,1810800,2502800,3080900,...,5202600,976300,3761500,14226400,698900,1084500,17253686,932356,112600,3758600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779400,17181200,524500,63814900,2685100,528000,2618400,890300,1213300,1629800,...,1932800,1134800,1201300,11539400,1266800,321100,700800,789800,180900,1263300
2022-12-27,879500,18878200,717100,69007800,2669200,740300,2927400,666700,1009600,1464300,...,2266000,4026500,1580600,11962100,998800,468200,1300200,755600,252500,704200
2022-12-28,784300,20470400,685500,85438400,2944500,804700,3265200,1107900,1387000,1672100,...,2244100,2131500,1513700,10702100,1430500,480400,964800,750100,241200,680300
2022-12-29,854400,19706300,719100,75703700,3112500,1480000,3047800,1063400,1516800,1793100,...,1846000,1431100,1398000,10534000,1139500,516300,875700,686600,274900,685300


In [56]:
df_shares_outstanding_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [57]:
# reference: Wharton Research Data Services. "WRDS" wrds.wharton.upenn.edu, accessed 2023-01-16.

# Loading Dow jones 39 year closing data from WRDS

In [58]:
# period: 1983-12-31 - 2022-12-31 (39 years, could not go back farther due to data availability in WRDS)

In [59]:
closing_DOW_39years = pd.read_csv(r'data/39 years DOW closing Computat.csv', parse_dates=True, index_col=2)

In [60]:
closing_DOW_39years

Unnamed: 0_level_0,gvkey,iid,tic,prccd
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1984-01-02,1300,1,HON,
1984-01-03,1300,1,HON,55.625
1984-01-04,1300,1,HON,55.125
1984-01-05,1300,1,HON,55.625
1984-01-06,1300,1,HON,54.500
...,...,...,...,...
2017-11-16,179534,3,0051B,
2018-02-15,179534,3,0051B,
2018-05-17,179534,3,0051B,
2018-08-16,179534,3,0051B,


In [61]:
closing_DOW_39years.columns

Index(['gvkey', 'iid', 'tic', 'prccd'], dtype='object')

In [62]:
closing_DOW_39years.drop(columns=['gvkey', 'iid',], inplace=True)

In [63]:
closing_DOW_39years

Unnamed: 0_level_0,tic,prccd
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1
1984-01-02,HON,
1984-01-03,HON,55.625
1984-01-04,HON,55.125
1984-01-05,HON,55.625
1984-01-06,HON,54.500
...,...,...
2017-11-16,0051B,
2018-02-15,0051B,
2018-05-17,0051B,
2018-08-16,0051B,


In [64]:
closing_DOW_39years['Date'] = closing_DOW_39years.index

In [65]:
closing_DOW_39years

Unnamed: 0_level_0,tic,prccd,Date
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1984-01-02,HON,,1984-01-02
1984-01-03,HON,55.625,1984-01-03
1984-01-04,HON,55.125,1984-01-04
1984-01-05,HON,55.625,1984-01-05
1984-01-06,HON,54.500,1984-01-06
...,...,...,...
2017-11-16,0051B,,2017-11-16
2018-02-15,0051B,,2018-02-15
2018-05-17,0051B,,2018-05-17
2018-08-16,0051B,,2018-08-16


In [66]:
closing_DOW_39years_clean_pivot = closing_DOW_39years.pivot(index = 'Date', columns='tic',values = 'prccd')

In [67]:
closing_DOW_39years_clean_pivot

tic,0050B,0051B,4764B,AAPL,AMGN,AXD.1,AXP,BA,CAT,CRM,...,NKE,PG,TAP.,TAP.AZ,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,,,,,,,,,,,...,,,,,,,,,,
1984-01-03,,,,25.625,7.250,,31.125,44.000,45.875,,...,14.875,57.000,,,,,,67.125,37.750,38.375
1984-01-04,,,,27.875,7.125,,32.125,45.875,48.750,,...,15.375,56.875,,,,,,69.125,37.625,39.000
1984-01-05,,,,28.250,7.500,,33.000,47.750,49.000,,...,15.875,56.375,,,,,,69.750,37.750,39.750
1984-01-06,,,,27.750,8.000,,33.625,47.625,48.625,,...,16.375,54.875,,,,,,71.000,37.875,39.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,,,,131.860,263.920,,147.020,189.060,239.870,129.44,...,116.250,152.620,,,189.48,531.31,205.83,38.410,38.630,143.770
2022-12-27,,,,130.030,263.390,,146.390,189.400,243.140,130.66,...,117.560,153.950,,,190.48,531.99,206.29,39.250,38.310,143.810
2022-12-28,,,,126.040,261.420,,144.010,188.380,239.380,128.47,...,114.980,151.960,,,188.13,528.45,204.99,38.810,37.580,141.290
2022-12-29,,,,129.610,263.160,,147.320,188.910,240.200,132.54,...,117.350,152.590,,,189.44,529.88,208.06,39.260,37.470,142.150


In [68]:
list_of_missing_dow = []
for col in closing_DOW_39years_clean_pivot.columns:
    list_of_missing_dow.append((str(col), closing_DOW_39years_clean_pivot[col].isnull().sum()))

In [69]:
list_of_missing_dow

[('0050B', 9837),
 ('0051B', 9837),
 ('4764B', 9837),
 ('AAPL', 5),
 ('AMGN', 6),
 ('AXD.1', 9127),
 ('AXP', 5),
 ('BA', 5),
 ('CAT', 6),
 ('CRM', 5172),
 ('CSCO', 1555),
 ('CVX', 5),
 ('DIS', 6),
 ('DOW', 8882),
 ('GS', 3881),
 ('GS.PI', 8566),
 ('GS.PJ', 7394),
 ('GS.PK', 7651),
 ('GS.PN', 8517),
 ('HD', 5),
 ('HON', 6),
 ('HON.WI', 9827),
 ('IBM', 5),
 ('INTC', 29),
 ('JNJ', 7),
 ('JPM', 6),
 ('JPM.PA', 8116),
 ('JPM.PB', 8574),
 ('JPM.PC', 8844),
 ('JPM.PD', 8766),
 ('JPM.PD.1', 8519),
 ('JPM.PE', 8535),
 ('JPM.PE.', 9307),
 ('JPM.PF', 8563),
 ('JPM.PF.', 6761),
 ('JPM.PG', 8250),
 ('JPM.PG.1', 9307),
 ('JPM.PH', 8364),
 ('JPM.PH.', 8739),
 ('JPM.PI', 8580),
 ('JPM.PJ', 9049),
 ('JPM.PK', 9388),
 ('JPM.PL', 9432),
 ('JPM.PM', 9481),
 ('JPM.PS.', 8475),
 ('JPM.WS', 7605),
 ('KO', 6),
 ('MCD', 6),
 ('MMM', 6),
 ('MRK', 6),
 ('MRK.WI', 9832),
 ('MSFT', 560),
 ('NKE', 5),
 ('PG', 5),
 ('TAP.', 8827),
 ('TAP.AZ', 9325),
 ('TRV', 4700),
 ('UNH', 207),
 ('V', 6113),
 ('VZ', 6),
 ('WBA', 5

In [70]:
closing_DOW_39years_clean_pivot = closing_DOW_39years_clean_pivot[:][tickers_dow]

In [71]:
closing_DOW_39years_clean_pivot

tic,UNH,GS,HD,AMGN,MCD,CAT,MSFT,V,HON,BA,...,MMM,NKE,MRK,DIS,KO,DOW,CSCO,VZ,WBA,INTC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,,,,,,,,,,,...,,,,,,,,,,
1984-01-03,,,25.25,7.250,69.625,45.875,,,55.625,44.000,...,82.250,14.875,90.375,52.625,53.250,,,67.125,37.750,40.750
1984-01-04,,,24.50,7.125,70.875,48.750,,,55.125,45.875,...,84.125,15.375,90.250,53.000,54.875,,,69.125,37.625,41.125
1984-01-05,,,26.00,7.500,72.250,49.000,,,55.625,47.750,...,84.875,15.875,91.500,53.250,55.250,,,69.750,37.750,43.000
1984-01-06,,,25.75,8.000,72.250,48.625,,,54.500,47.625,...,84.625,16.375,91.125,52.750,54.625,,,71.000,37.875,42.750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,531.31,345.51,318.73,263.920,267.570,239.870,238.73,205.83,213.870,189.060,...,120.140,116.250,111.860,88.010,63.820,50.86,47.48,38.410,38.630,26.090
2022-12-27,531.99,341.97,319.55,263.390,266.840,243.140,236.96,206.29,214.890,189.400,...,120.220,117.560,112.120,86.370,64.210,51.19,47.53,39.250,38.310,25.940
2022-12-28,528.45,340.87,315.73,261.420,265.110,239.380,234.53,204.99,212.190,188.380,...,118.290,114.980,111.080,84.170,63.570,49.99,47.07,38.810,37.580,25.540
2022-12-29,529.88,343.43,320.41,263.160,265.930,240.200,241.01,208.06,214.750,188.910,...,120.570,117.350,110.820,87.180,63.950,50.65,47.50,39.260,37.470,26.210


In [72]:
closing_DOW_39years_clean_pivot.fillna(method='backfill', inplace=True)

In [73]:
df_closing_DOW_39_years_clean = closing_DOW_39years_clean_pivot[:][tickers_dow]

In [74]:
df_closing_DOW_39_years_clean

tic,UNH,GS,HD,AMGN,MCD,CAT,MSFT,V,HON,BA,...,MMM,NKE,MRK,DIS,KO,DOW,CSCO,VZ,WBA,INTC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,4.688,70.375,25.25,7.250,69.625,45.875,28.00,56.50,55.625,44.000,...,82.250,14.875,90.375,52.625,53.250,49.80,22.25,67.125,37.750,40.750
1984-01-03,4.688,70.375,25.25,7.250,69.625,45.875,28.00,56.50,55.625,44.000,...,82.250,14.875,90.375,52.625,53.250,49.80,22.25,67.125,37.750,40.750
1984-01-04,4.688,70.375,24.50,7.125,70.875,48.750,28.00,56.50,55.125,45.875,...,84.125,15.375,90.250,53.000,54.875,49.80,22.25,69.125,37.625,41.125
1984-01-05,4.688,70.375,26.00,7.500,72.250,49.000,28.00,56.50,55.625,47.750,...,84.875,15.875,91.500,53.250,55.250,49.80,22.25,69.750,37.750,43.000
1984-01-06,4.688,70.375,25.75,8.000,72.250,48.625,28.00,56.50,54.500,47.625,...,84.625,16.375,91.125,52.750,54.625,49.80,22.25,71.000,37.875,42.750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,531.310,345.510,318.73,263.920,267.570,239.870,238.73,205.83,213.870,189.060,...,120.140,116.250,111.860,88.010,63.820,50.86,47.48,38.410,38.630,26.090
2022-12-27,531.990,341.970,319.55,263.390,266.840,243.140,236.96,206.29,214.890,189.400,...,120.220,117.560,112.120,86.370,64.210,51.19,47.53,39.250,38.310,25.940
2022-12-28,528.450,340.870,315.73,261.420,265.110,239.380,234.53,204.99,212.190,188.380,...,118.290,114.980,111.080,84.170,63.570,49.99,47.07,38.810,37.580,25.540
2022-12-29,529.880,343.430,320.41,263.160,265.930,240.200,241.01,208.06,214.750,188.910,...,120.570,117.350,110.820,87.180,63.950,50.65,47.50,39.260,37.470,26.210


# Loading S&P 500 39 year closing data from WRDS

download link: https://www.dropbox.com/s/00oobhqewkz87j3/39%20years%20S%26P%20closing%20Computat.csv?dl=0

In [75]:
closing_SnP500_39_years = pd.read_csv(r'data/39 years S&P closing Computat.csv')


Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.



In [76]:
closing_SnP500_39_years

Unnamed: 0,gvkey,iid,datadate,tic,prccd
0,1045,01,19840102,AAMRQ,
1,1045,01,19840103,AAMRQ,36.125
2,1045,01,19840104,AAMRQ,37.625
3,1045,01,19840105,AAMRQ,38.500
4,1045,01,19840106,AAMRQ,38.625
...,...,...,...,...,...
4521078,294524,2,20101130,LYB.B,29.240
4521079,294524,2,20101201,LYB.B,29.870
4521080,294524,2,20101202,LYB.B,30.410
4521081,294524,2,20101203,LYB.B,30.550


In [77]:
closing_SnP500_39_years['Date'] = closing_SnP500_39_years['datadate']

In [78]:
closing_SnP500_39_years.index = pd.to_datetime(closing_SnP500_39_years['Date'], format=r'%Y%m%d')
assert closing_SnP500_39_years.index.dtype == 'datetime64[ns]'

In [79]:
closing_SnP500_39_years['Date'] = closing_SnP500_39_years.index 

In [80]:
closing_SnP500_39_years_clean_pivot = closing_SnP500_39_years.pivot(index = 'Date' , columns='tic',values = 'prccd')

In [81]:
closing_SnP500_39_years_clean_pivot[:][used_ticker_symbols]

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,,,,,,,,,,,...,,,,,,,,,,
1984-01-03,,,,25.625,,,46.000,,,,...,33.750,,36.750,36.625,,,,,,35.00
1984-01-04,,,,27.875,,,47.500,,,,...,34.125,,37.000,37.125,,,,,,35.00
1984-01-05,,,,28.250,,,48.000,,,,...,35.000,,37.250,37.375,,,,,,35.00
1984-01-06,,,,27.750,,,47.875,,,,...,34.750,,37.625,37.375,,,,,,35.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,149.23,12.71,143.28,131.860,163.10,170.01,108.180,63.38,266.09,338.45,...,31.490,80.72,70.930,108.680,31.83,109.73,128.90,126.69,248.22,48.45
2022-12-27,149.55,12.53,145.02,130.030,162.99,169.00,108.570,63.62,265.31,335.09,...,31.470,84.33,71.570,110.190,32.07,110.72,129.90,127.28,251.00,48.84
2022-12-28,148.09,12.32,145.30,126.040,162.23,167.36,107.830,62.60,263.12,328.33,...,30.630,80.09,70.570,108.380,30.98,108.94,129.31,125.99,246.84,47.97
2022-12-29,151.09,12.70,146.31,129.610,162.56,166.05,110.310,63.11,268.38,337.58,...,31.320,81.26,71.070,109.200,32.28,111.64,129.99,127.83,257.53,49.08


In [82]:
closing_SnP500_39_years_clean_pivot.fillna(method='backfill', inplace=True)

In [83]:
df_closing_SnP500_39_years_clean = closing_SnP500_39_years_clean_pivot[:][used_ticker_symbols]

In [84]:
df_closing_SnP500_39_years_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,44.00,24.60,41.70,25.625,35.00,23.625,46.000,21.25,15.17,13.75,...,33.750,13.01,36.750,36.625,8.75,24.25,29.125,29.76,18.25,35.00
1984-01-03,44.00,24.60,41.70,25.625,35.00,23.625,46.000,21.25,15.17,13.75,...,33.750,13.01,36.750,36.625,8.75,24.25,29.125,29.76,18.25,35.00
1984-01-04,44.00,24.60,41.70,27.875,35.00,23.625,47.500,21.25,15.17,13.75,...,34.125,13.01,37.000,37.125,8.75,24.25,29.125,29.76,18.25,35.00
1984-01-05,44.00,24.60,41.70,28.250,35.00,23.625,48.000,21.25,15.17,13.75,...,35.000,13.01,37.250,37.375,8.75,24.25,29.125,29.76,18.25,35.00
1984-01-06,44.00,24.60,41.70,27.750,35.00,23.625,47.875,21.25,15.17,13.75,...,34.750,13.01,37.625,37.375,8.75,24.25,29.125,29.76,18.25,35.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,149.23,12.71,143.28,131.860,163.10,170.010,108.180,63.38,266.09,338.45,...,31.490,80.72,70.930,108.680,31.83,109.73,128.900,126.69,248.22,48.45
2022-12-27,149.55,12.53,145.02,130.030,162.99,169.000,108.570,63.62,265.31,335.09,...,31.470,84.33,71.570,110.190,32.07,110.72,129.900,127.28,251.00,48.84
2022-12-28,148.09,12.32,145.30,126.040,162.23,167.360,107.830,62.60,263.12,328.33,...,30.630,80.09,70.570,108.380,30.98,108.94,129.310,125.99,246.84,47.97
2022-12-29,151.09,12.70,146.31,129.610,162.56,166.050,110.310,63.11,268.38,337.58,...,31.320,81.26,71.070,109.200,32.28,111.64,129.990,127.83,257.53,49.08


# WRDS opening Data

downlaod link: https://www.dropbox.com/s/mogmrhtzli0z08u/39%20years%20S%26P%20opening%20Computat.csv?dl=0

In [85]:
opening_SnP500_39_years = pd.read_csv(r'data/39 years S&P opening Computat.csv')


Columns (1) have mixed types.Specify dtype option on import or set low_memory=False.



In [86]:
opening_SnP500_39_years

Unnamed: 0,gvkey,iid,datadate,tic,prcod
0,1045,01,19840102,AAMRQ,
1,1045,01,19840103,AAMRQ,
2,1045,01,19840104,AAMRQ,
3,1045,01,19840105,AAMRQ,
4,1045,01,19840106,AAMRQ,
...,...,...,...,...,...
4521078,294524,2,20101130,LYB.B,29.35
4521079,294524,2,20101201,LYB.B,29.67
4521080,294524,2,20101202,LYB.B,30.17
4521081,294524,2,20101203,LYB.B,30.07


In [87]:
opening_SnP500_39_years['Date'] = opening_SnP500_39_years['datadate']

In [88]:
opening_SnP500_39_years.index = pd.to_datetime(opening_SnP500_39_years['Date'], format=r'%Y%m%d')
assert opening_SnP500_39_years.index.dtype == 'datetime64[ns]'

In [89]:
#
opening_SnP500_39_years['Date'] = opening_SnP500_39_years.index 

In [90]:
opening_SnP500_39_years_clean_pivot = opening_SnP500_39_years.pivot(index = 'Date' , columns='tic',values = 'prcod')

In [91]:
opening_SnP500_39_years_clean_pivot

tic,0050B,0051B,1974B,2091B,3555B,3CCIKO,3UALAQ,4741B,4764B,4910B,...,ZB.PF,ZBH,ZBH.WI,ZBRA,ZION,ZIONN,ZIONO,ZIONP,ZIONW,ZIONZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,,,,,,,,,,,...,,,,,,,,,,
1984-01-03,,,,,,,,,,,...,,,,,,,,,,
1984-01-04,,,,,,,,,,,...,,,,,,,,,,
1984-01-05,,,,,,,,,,,...,,,,,,,,,,
1984-01-06,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,,,,,,,,,,,...,,125.94,,246.86,48.28,,25.10,18.0500,,
2022-12-27,,,,,,,,,,,...,,127.00,,249.35,48.65,,25.14,18.3331,,
2022-12-28,,,,,,,,,,,...,,127.44,,250.48,48.87,,25.04,17.9500,,
2022-12-29,,,,,,,,,,,...,,126.66,,249.55,48.06,,25.14,17.8379,,


In [92]:
opening_SnP500_39_years_clean_pivot[:][used_ticker_symbols]

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,,,,,,,,,,,...,,,,,,,,,,
1984-01-03,,,,,,,,,,,...,,,,,,,,,,
1984-01-04,,,,,,,,,,,...,,,,,,,,,,
1984-01-05,,,,,,,,,,,...,,,,,,,,,,
1984-01-06,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,149.01,12.51,141.90,130.92,163.39,169.21,107.67,62.91,265.79,335.00,...,31.05,80.71,69.94,106.95,31.31,109.61,128.43,125.94,246.86,48.28
2022-12-27,149.23,12.62,143.28,131.38,163.49,170.00,108.35,63.38,265.92,338.00,...,31.37,81.45,70.99,109.27,31.93,109.99,129.30,127.00,249.35,48.65
2022-12-28,149.78,12.52,144.95,129.67,163.70,169.56,109.02,63.67,266.10,335.06,...,31.41,83.52,71.28,109.91,32.19,110.82,130.19,127.44,250.48,48.87
2022-12-29,149.61,12.39,146.11,127.99,162.96,168.00,108.75,62.74,265.59,330.75,...,30.84,80.45,70.89,107.99,31.06,109.90,129.55,126.66,249.55,48.06


In [93]:
opening_SnP500_39_years_clean_pivot.fillna(method='backfill', inplace=True)

In [94]:
df_opening_SnP500_39_years_clean = opening_SnP500_39_years_clean_pivot[:][used_ticker_symbols]

In [95]:
df_opening_SnP500_39_years_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984-01-02,21.67,23.95,37.57,34.61,37.00,53.66,41.18,36.35,25.78,45.98,...,62.89,38.41,17.51,45.37,49.45,23.50,39.60,71.80,56.659,61.70
1984-01-03,21.67,23.95,37.57,34.61,37.00,53.66,41.18,36.35,25.78,45.98,...,62.89,38.41,17.51,45.37,49.45,23.50,39.60,71.80,56.659,61.70
1984-01-04,21.67,23.95,37.57,34.61,37.00,53.66,41.18,36.35,25.78,45.98,...,62.89,38.41,17.51,45.37,49.45,23.50,39.60,71.80,56.659,61.70
1984-01-05,21.67,23.95,37.57,34.61,37.00,53.66,41.18,36.35,25.78,45.98,...,62.89,38.41,17.51,45.37,49.45,23.50,39.60,71.80,56.659,61.70
1984-01-06,21.67,23.95,37.57,34.61,37.00,53.66,41.18,36.35,25.78,45.98,...,62.89,38.41,17.51,45.37,49.45,23.50,39.60,71.80,56.659,61.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,149.01,12.51,141.90,130.92,163.39,169.21,107.67,62.91,265.79,335.00,...,31.05,80.71,69.94,106.95,31.31,109.61,128.43,125.94,246.860,48.28
2022-12-27,149.23,12.62,143.28,131.38,163.49,170.00,108.35,63.38,265.92,338.00,...,31.37,81.45,70.99,109.27,31.93,109.99,129.30,127.00,249.350,48.65
2022-12-28,149.78,12.52,144.95,129.67,163.70,169.56,109.02,63.67,266.10,335.06,...,31.41,83.52,71.28,109.91,32.19,110.82,130.19,127.44,250.480,48.87
2022-12-29,149.61,12.39,146.11,127.99,162.96,168.00,108.75,62.74,265.59,330.75,...,30.84,80.45,70.89,107.99,31.06,109.90,129.55,126.66,249.550,48.06


# Plotting Data

In [96]:
# Plotting mean (adj close) daily returns, averaged per day across stocks:
df_daily_returns_clean.mean(1).plot(title='Mean Daily Returns across stocks')

In [97]:
# Plotting mean daily volume, averaged per day across  stocks:
df_volume_clean.mean(1).plot(title='Mean Daily Volume across stocks')


In [98]:
# Plotting Total daily volume, summed per day across stocks:
df_volume_clean.sum(1).plot(title='Total Daily Volume across stocks')

In [99]:
df_shares_outstanding_clean.sum(1).plot(title='Total Shares Outstanding across stocks')

In [100]:
df_closing_SnP500_39_years_clean.sum(1).plot(title='Total Daily closing across stocks (S&P 39 years)')

In [101]:
df_closing_DOW_39_years_clean.sum(1).plot(title='Total Daily closing across stocks (DOW 39 years)')

In [102]:
df_opening_SnP500_39_years_clean.sum(1).plot(title='Total Daily opening across stocks (S&P 39 years)')

In [103]:
# reference to dataset: https://www.nasdaq.com/market-activity/index/spx/historical
# The WRDS data didn't have the closing prices so we had to take it from another source which only had 10 years
total_SnP500_closing_10_years = pd.read_csv(r'data/10 years total S&P closing.csv', parse_dates=True, index_col='Date')['Close/Last']
assert total_SnP500_closing_10_years.index.dtype == 'datetime64[ns]'

In [104]:
# Plotting mean daily openturns, averaged per day across stocks:
total_SnP500_closing_10_years.plot(title='Total Daily closing of S&P500 index (10 years)')

# Saving dataframes to csv files


In [105]:
# saving the dataframes to csv files
df_daily_returns_clean.to_csv('data/df_daily_returns_clean.csv')
df_volume_clean.to_csv('data/df_volume_clean.csv')
df_shares_outstanding_clean.to_csv('data/df_shares_outstanding_clean.csv')
df_volume_clean_wrds.to_csv('data/df_volume_clean_WRDS.csv')
df_shares_outstanding_clean.to_csv('data/df_shares_outstanding_clean.csv')
df_volume_clean_wrds.to_csv('data/df_volume_clean_WRDS.csv')
df_closing_DOW_39_years_clean.to_csv('data/df_closing_DOW_39_years_clean.csv')
df_closing_SnP500_39_years_clean.to_csv('data/df_closing_SnP500_39_years_clean.csv')
df_opening_SnP500_39_years_clean.to_csv('data/df_opening_SnP500_39_years_clean.csv')