In [236]:
import datetime as dt
import pandas as pd
from pandas_datareader import data as pdr
import plotly.offline as plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
yf.pdr_override()
import os
plotly.init_notebook_mode(connected=True)
pd.options.plotting.backend = 'plotly'
import wrds
import numpy as np


## Specifying date range

In [237]:
# selecting dates to be 10 year window after Feng et al. 2012's paper
end = dt.datetime(2023, 1, 1)
start = dt.datetime(2013, 1, 1)
start, end

(datetime.datetime(2013, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 0, 0))

## Select stock/ticker

# Importing stock tickers from 
from https://stockmarketmba.com/stocksinthesp500.php


In [238]:
working_dir = os.getcwd()

In [239]:
os.listdir(working_dir)

['.git',
 'data',
 'DataImport-1.1.ipynb',
 'DataImport-1.2.ipynb',
 'DataImport-1.3.ipynb',
 'DataImport-1.4.ipynb',
 'DataImport-1.5.ipynb',
 'DataImport-1.6.ipynb',
 'DataImport-1.7.ipynb',
 'DataImport-1.8.ipynb',
 'model_test.py',
 'PCS_Update.ipynb',
 'README.md',
 'Real_stock_data_analysis.ipynb',
 'Stock Data analysis.ipynb',
 'Stock_Data_analysis.ipynb',
 'Stock_sharesoutstanding_WRDS_query_csv.csv',
 'stock_volume_WRDS_query.txt',
 'Stock_volume_WRDS_query_csv.csv',
 'Stock_volume_WRDS_query_csv.zip',
 'Trail_Agent_code.ipynb',
 'used_ticker_list.txt']

In [240]:
path = "data/Stocks_in_SP_500_Index.xlsx"
data = pd.read_excel(path,skiprows=1)
data.head()

Unnamed: 0,Symbol,Description,Category2,Category3,GICS Sector,Market cap,Dividend yield,Price to TTM earnings,Price to TTM sales,Price to book value,Action
0,AAPL,Apple Inc,Common stocks,Large cap,Information Technology,1988832912360,0.0073,0,0,0,Analyze
1,MSFT,Microsoft Corp,Common stocks,Large cap,Information Technology,1657655067218,0.0114,0,0,0,Analyze
2,GOOG,Alphabet Inc Class C,Common stocks,Large cap,Communication Services,1042954820000,0.0,0,0,0,Analyze
3,GOOGL,Alphabet Inc Class A,Common stocks,Large cap,Communication Services,1042954820000,0.0,0,0,0,Analyze
4,AMZN,Amazon.Com Inc.,Common stocks,Large cap,Consumer Discretionary,847961495109,0.0,0,0,0,Analyze


In [241]:
tickers = data['Symbol'].to_list()

In [242]:
tickers

['AAPL',
 'MSFT',
 'GOOG',
 'GOOGL',
 'AMZN',
 'BRK.B',
 'JNJ',
 'UNH',
 'XOM',
 'V',
 'JPM',
 'WMT',
 'PG',
 'NVDA',
 'TSLA',
 'META',
 'LLY',
 'CVX',
 'MA',
 'HD',
 'ABBV',
 'MRK',
 'PFE',
 'BAC',
 'KO',
 'PEP',
 'AVGO',
 'ORCL',
 'TMO',
 'COST',
 'ABT',
 'CSCO',
 'MCD',
 'NKE',
 'DHR',
 'TMUS',
 'VZ',
 'ACN',
 'DIS',
 'NEE',
 'WFC',
 'CMCSA',
 'PM',
 'BMY',
 'ADBE',
 'TXN',
 'SCHW',
 'LIN',
 'UPS',
 'RTX',
 'COP',
 'MS',
 'AMGN',
 'HON',
 'T',
 'NFLX',
 'CRM',
 'IBM',
 'CAT',
 'LMT',
 'UNP',
 'LOW',
 'DE',
 'QCOM',
 'BA',
 'SBUX',
 'CVS',
 'GS',
 'INTC',
 'ELV',
 'SPGI',
 'AXP',
 'GILD',
 'MDT',
 'INTU',
 'BLK',
 'PLD',
 'AMD',
 'AMT',
 'ADP',
 'ISRG',
 'SYK',
 'EL',
 'TJX',
 'CI',
 'CB',
 'C',
 'MDLZ',
 'PYPL',
 'BKNG',
 'AMAT',
 'MMC',
 'NOC',
 'ADI',
 'MO',
 'DUK',
 'GE',
 'REGN',
 'PGR',
 'SO',
 'SLB',
 'VRTX',
 'NOW',
 'EOG',
 'BDX',
 'TGT',
 'HCA',
 'ZTS',
 'MMM',
 'USB',
 'ITW',
 'GD',
 'APD',
 'MRNA',
 'CL',
 'BSX',
 'WM',
 'CSX',
 'PNC',
 'FISV',
 'ETN',
 'AON',
 'HUM',
 'E

## pandas_datareader module

In [243]:
# 1. pdr.DataReader(stocks, 'yahoo', start, end)
# 2. pdr.get_data_yahoo(stocks, start, end)

df = pdr.get_data_yahoo(tickers, start, end)

[*********************100%***********************]  503 of 503 completed

2 Failed downloads:
- BF.B: No data found for this date range, symbol may be delisted
- BRK.B: No data found for this date range, symbol may be delisted


# Making a DataFrame containing Adjusted close Daily returns and volume returns from Yahoo finance

In [244]:
df['Adj Close'].columns

Index(['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACGL', 'ACN', 'ADBE',
       ...
       'WYNN', 'XEL', 'XOM', 'XRAY', 'XYL', 'YUM', 'ZBH', 'ZBRA', 'ZION',
       'ZTS'],
      dtype='object', length=503)

In [245]:
#checking for missing values & printing the columns with missing values
col_with_missing_values = {}
for col in df['Adj Close'].columns:
    # counting the columns with missing values in Adj Close
    sum_of_missing_values = df['Adj Close'][col].isnull().sum()
    # also counting the columns with missing values in Volume
    sum_of_missing_values += df['Volume'][col].isnull().sum()
    if sum_of_missing_values > 0:
        col_with_missing_values[col] = sum_of_missing_values
        print(col)

ALLE
ANET
BF.B
BRK.B
CARR
CDAY
CDW
CEG
CFG
CTLT
CTVA
CZR
DOW
ETSY
FOX
FOXA
FTV
HLT
HPE
HWM
INVH
IQV
IR
KEYS
KHC
LW
MRNA
NCLH
NWS
NWSA
OGN
OTIS
PAYC
PYPL
QRVO
SEDG
SYF
VICI
WRK
ZTS


In [246]:
df_col_missing_values = pd.DataFrame(col_with_missing_values, index = [0]).T
df_col_missing_values.columns = ['Missing Values']

In [247]:
list_of_tickers_with_missing_values = df_col_missing_values.index.to_list()

In [248]:
print(f'stocks with missing values {list_of_tickers_with_missing_values}')
print(f'stocks to de removed: {len(list_of_tickers_with_missing_values)}')


stocks with missing values ['ALLE', 'ANET', 'BF.B', 'BRK.B', 'CARR', 'CDAY', 'CDW', 'CEG', 'CFG', 'CTLT', 'CTVA', 'CZR', 'DOW', 'ETSY', 'FOX', 'FOXA', 'FTV', 'HLT', 'HPE', 'HWM', 'INVH', 'IQV', 'IR', 'KEYS', 'KHC', 'LW', 'MRNA', 'NCLH', 'NWS', 'NWSA', 'OGN', 'OTIS', 'PAYC', 'PYPL', 'QRVO', 'SEDG', 'SYF', 'VICI', 'WRK', 'ZTS']
stocks to de removed: 40


In [249]:
df_adj_close_copy = df['Adj Close'].copy()
df_volume_copy = df['Volume'].copy()

In [250]:
# removing stocks with missing values
df_adj_close_copy.drop(list_of_tickers_with_missing_values, axis=1, inplace=True)
df_volume_copy.drop(list_of_tickers_with_missing_values, axis=1, inplace=True)
df_adj_close_clean = df_adj_close_copy
df_volume_clean = df_volume_copy


In [251]:
df_adj_close_clean

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,27.435249,13.179526,67.488411,16.862820,23.105902,37.189949,26.401579,14.793333,57.290291,38.340000,...,20.031265,96.279892,19.881599,58.083359,37.638954,24.058147,39.864063,60.554222,40.959999,19.219213
2013-01-03,27.533506,12.877849,67.488411,16.649981,22.915110,37.112988,27.406569,14.750000,57.082905,37.750000,...,20.031265,97.315521,19.823515,57.978611,37.536766,23.979412,40.113361,61.421844,41.000000,19.245436
2013-01-04,28.077236,13.886580,68.540459,16.186197,22.625626,37.360970,27.241825,14.876667,57.398140,38.130001,...,20.196188,98.489769,19.896126,58.247044,37.861874,23.944414,40.552612,61.734901,40.669998,19.796055
2013-01-07,27.874159,13.990285,68.307716,16.090982,22.671680,37.480686,27.464237,14.730000,57.149258,37.939999,...,20.258041,98.824097,19.685547,57.572674,37.991947,23.629480,40.297375,61.886940,40.900002,19.699917
2013-01-08,27.651424,14.291960,67.190506,16.134289,22.178247,37.429379,27.472466,14.750000,57.481102,38.139999,...,20.395473,98.742569,19.721848,57.932774,37.555351,23.340780,38.605694,61.976402,40.930000,19.306614
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,149.007767,12.710000,143.279999,131.860001,161.564163,170.009995,107.692398,63.380001,265.006012,338.450012,...,31.490000,80.720001,70.446365,108.680000,31.701571,109.730003,128.899994,126.690002,248.220001,48.450001
2022-12-27,149.327301,12.530000,145.020004,130.029999,161.455200,169.000000,108.080643,63.619999,264.229187,335.089996,...,31.469999,84.330002,71.082001,110.190002,31.940601,110.720001,129.899994,127.279999,251.000000,48.840000
2022-12-28,147.869461,12.320000,145.300003,126.040001,160.702347,167.360001,107.343979,62.599998,262.048126,328.329987,...,30.629999,80.089996,70.570000,108.379997,30.855000,108.940002,129.309998,125.989998,246.839996,47.970001
2022-12-29,150.865005,12.700000,146.309998,129.610001,161.029236,166.050003,109.812798,63.110001,267.286682,337.579987,...,31.320000,81.260002,71.070000,109.199997,32.279999,111.639999,129.990005,127.830002,257.529999,49.080002


In [252]:
df_volume_clean

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,8790205,6662000,800500,560518000,13767900,1972400,20266400,2932800,4040500,6483800,...,6595000,2487000,2702000,16143700,856600,927300,6091328,1439425,213800,2551100
2013-01-03,5751791,5398400,520600,352965200,16739300,2026700,22148200,1966200,3340700,3906000,...,3967200,1731900,2403600,13268200,525500,610800,3654574,1622250,99900,2267900
2013-01-04,6432897,12048300,615000,594333600,21372100,2461500,15820100,1591800,3145600,3809300,...,3639400,2129100,1750100,11427900,660400,563400,3782685,1226524,146000,3577700
2013-01-07,3589505,5730600,1054400,484156400,17897100,1803600,13120000,1296900,2262800,3632100,...,2460400,1501000,2856800,11799800,554100,481300,5289417,985710,90600,2286000
2013-01-08,3896925,8034400,840000,458707200,17863300,1533000,15042300,1810800,2502800,3080900,...,5202600,976300,3761500,14226400,698900,1084500,17253686,932356,112600,3758600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779400,17181200,524500,63814900,2685100,528000,2618400,890300,1213300,1629800,...,1932800,1134800,1201300,11539400,1266800,321100,700800,789800,180900,1263300
2022-12-27,879500,18878200,717100,69007800,2669200,740300,2927400,666700,1009600,1464300,...,2266000,4026500,1580600,11962100,998800,468200,1300200,755600,252500,704200
2022-12-28,784300,20470400,685500,85438400,2944500,804700,3265200,1107900,1387000,1672100,...,2244100,2131500,1513700,10702100,1430500,480400,964800,750100,241200,680300
2022-12-29,854400,19706300,719100,75703700,3112500,1480000,3047800,1063400,1516800,1793100,...,1846000,1431100,1398000,10534000,1139500,516300,875700,686600,274900,685300


In [253]:
# taking Daily returns (percent change of  Adj close of the stocks)  and storing in a new dataframe
# also dropping the first row as it will be NaN after percent change transformaion
# usually returns would be described as pct change in price at time t vs. t-1, but this transformation will not be used since we found out feng et al. 2012 moddelled daily returns as log (xt+1) - log(xt) which we will do aswell.
df_daily_returns_clean = df_adj_close_clean.apply(np.log).diff()
df_daily_returns_clean

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,,,,,,,,,,,...,,,,,,,,,,
2013-01-03,0.003575,-0.023156,0.000000,-0.012702,-0.008292,-0.002072,0.037359,-0.002934,-0.003626,-0.015508,...,0.000000,0.010699,-0.002926,-0.001805,-0.002719,-0.003278,0.006234,0.014226,0.000976,0.001363
2013-01-04,0.019555,0.075414,0.015468,-0.028250,-0.012713,0.006660,-0.006029,0.008551,0.005507,0.010016,...,0.008200,0.011994,0.003656,0.004619,0.008624,-0.001461,0.010891,0.005084,-0.008081,0.028209
2013-01-07,-0.007259,0.007440,-0.003401,-0.005900,0.002033,0.003199,0.008131,-0.009908,-0.004345,-0.004995,...,0.003058,0.003389,-0.010640,-0.011645,0.003430,-0.013240,-0.006314,0.002460,0.005639,-0.004868
2013-01-08,-0.008023,0.021334,-0.016491,0.002688,-0.022005,-0.001370,0.000300,0.001357,0.005790,0.005258,...,0.006761,-0.000825,0.001842,0.006235,-0.011558,-0.012293,-0.042887,0.001445,0.000733,-0.020167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,0.001475,0.011872,0.008410,-0.002802,-0.001042,0.004362,0.001388,0.008397,0.005011,0.005719,...,0.014071,-0.006421,0.012770,0.026101,0.011057,-0.000729,0.000621,0.001106,0.002864,0.003515
2022-12-27,0.002142,-0.014263,0.012071,-0.013976,-0.000675,-0.005959,0.003599,0.003779,-0.002936,-0.009977,...,-0.000635,0.043751,0.008983,0.013798,0.007512,0.008982,0.007728,0.004646,0.011137,0.008017
2022-12-28,-0.009811,-0.016902,0.001929,-0.031166,-0.004674,-0.009752,-0.006839,-0.016163,-0.008289,-0.020380,...,-0.027055,-0.051587,-0.007229,-0.016563,-0.034579,-0.016207,-0.004552,-0.010187,-0.016713,-0.017974
2022-12-29,0.020056,0.030378,0.006927,0.027931,0.002032,-0.007858,0.022739,0.008114,0.019794,0.027783,...,0.022277,0.014503,0.007060,0.007537,0.045149,0.024482,0.005245,0.014499,0.042396,0.022876


In [254]:
used_ticker_symbols = df_daily_returns_clean.columns
used_ticker_symbols

Index(['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACGL', 'ACN', 'ADBE',
       ...
       'WY', 'WYNN', 'XEL', 'XOM', 'XRAY', 'XYL', 'YUM', 'ZBH', 'ZBRA',
       'ZION'],
      dtype='object', length=463)

In [255]:
with open(r'C:\Users\Nitai Nijholt\OneDrive\Desktop\project computational science\PCS_Financial_Market_ABM\used_ticker_list.txt', 'w') as file:
    for item in used_ticker_symbols:
        file.write("%s\n" % item)
    print('done')

done


# loading WRDS Computat trading volume Data

In [256]:
# stock volume data row used belpw: https://www.dropbox.com/s/80r3e9ayeppz8ca/Stock_volume_WRDS_query_csv.zip?dl=0
# shares oustanding data row used belpw: https://www.dropbox.com/s/c9nf1lh3sdggsp0/Stock_sharesoutstanding_WRDS_query_csv.csv?dl=0

In [257]:
volume_stock_data = pd.read_csv(r'C:\Users\Nitai Nijholt\OneDrive\Desktop\project computational science\PCS_Financial_Market_ABM\stock_volume_WRDS_query_csv.csv', parse_dates=True, index_col=2)


Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.



In [258]:
volume_stock_data.columns

Index(['gvkey', 'iid', 'tic', 'conm', 'cshtrd'], dtype='object')

In [259]:
volume_stock_data

Unnamed: 0_level_0,gvkey,iid,tic,conm,cshtrd
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-03,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,12096600.0
2007-01-04,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,9454800.0
2007-01-05,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7861900.0
2007-01-08,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7896300.0
2007-01-09,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7045300.0
...,...,...,...,...,...
2010-11-30,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,310980.0
2010-12-01,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,832088.0
2010-12-02,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,451115.0
2010-12-03,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,360461.0


In [260]:
volume_stock_data

Unnamed: 0_level_0,gvkey,iid,tic,conm,cshtrd
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-01-03,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,12096600.0
2007-01-04,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,9454800.0
2007-01-05,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7861900.0
2007-01-08,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7896300.0
2007-01-09,1045,01,AAMRQ,AMERICAN AIRLINES GROUP INC,7045300.0
...,...,...,...,...,...
2010-11-30,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,310980.0
2010-12-01,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,832088.0
2010-12-02,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,451115.0
2010-12-03,294524,2,LYB.B,LYONDELLBASELL INDUSTRIES NV,360461.0


In [261]:
volume_stock_data = volume_stock_data.drop(columns=['gvkey', 'iid', 'conm'])

In [262]:
volume_stock_data

Unnamed: 0_level_0,tic,cshtrd
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-01-03,AAMRQ,12096600.0
2007-01-04,AAMRQ,9454800.0
2007-01-05,AAMRQ,7861900.0
2007-01-08,AAMRQ,7896300.0
2007-01-09,AAMRQ,7045300.0
...,...,...
2010-11-30,LYB.B,310980.0
2010-12-01,LYB.B,832088.0
2010-12-02,LYB.B,451115.0
2010-12-03,LYB.B,360461.0


In [263]:
volume_stock_data['tic'].nunique()

970

In [264]:
volume_stock_data.tic.nunique()

970

In [265]:
volume_stock_data['Date'] = volume_stock_data.index

In [266]:
volume_stock_data

Unnamed: 0_level_0,tic,cshtrd,Date
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-01-03,AAMRQ,12096600.0,2007-01-03
2007-01-04,AAMRQ,9454800.0,2007-01-04
2007-01-05,AAMRQ,7861900.0,2007-01-05
2007-01-08,AAMRQ,7896300.0,2007-01-08
2007-01-09,AAMRQ,7045300.0,2007-01-09
...,...,...,...
2010-11-30,LYB.B,310980.0,2010-11-30
2010-12-01,LYB.B,832088.0,2010-12-01
2010-12-02,LYB.B,451115.0,2010-12-02
2010-12-03,LYB.B,360461.0,2010-12-03


In [267]:
volume_stock_data_clean_pivot = volume_stock_data.pivot(index = 'Date', columns='tic',values = 'cshtrd')

In [268]:
volume_stock_data_clean_pivot

tic,0050B,0051B,1974B,2091B,3CCIKO,4741B,4764B,5050B,5235B,5236B,...,ZB.PF,ZBH,ZBH.WI,ZBRA,ZION,ZIONN,ZIONO,ZIONP,ZIONW,ZIONZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-01-02,,,,,,,,,,,...,,,,,,,,,,
2007-01-03,,,,,0.0,,,0.0,,,...,,1792300.0,,560760.0,693041.0,,,0.0,,
2007-01-04,,,,,0.0,,,0.0,,,...,,2053000.0,,355633.0,569895.0,,,0.0,,
2007-01-05,,,,,0.0,,,0.0,,,...,,1220400.0,,645339.0,829044.0,,,500.0,,
2007-01-08,,,,,0.0,,,0.0,,,...,,1806100.0,,318215.0,620257.0,,,12700.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-09,,,,,,,,,,,...,,1423625.0,,535362.0,902684.0,,7514.0,2320.0,,
2023-01-10,,,,,,,,,,,...,,1894247.0,,353624.0,878534.0,,6911.0,1305.0,,
2023-01-11,,,,,,,,,,,...,,2448268.0,,600101.0,697494.0,,3202.0,1295.0,,
2023-01-12,,,,,,,,,,,...,,1888908.0,,735454.0,1295242.0,,13750.0,4946.0,,


In [269]:
volume_stock_data_clean_pivot.shape

(4071, 970)

In [270]:
for col in volume_stock_data_clean_pivot.columns:
    print(col, volume_stock_data_clean_pivot[col].isnull().sum())

0050B 4071
0051B 4071
1974B 4071
2091B 4071
3CCIKO 2741
4741B 4055
4764B 4071
5050B 3490
5235B 4071
5236B 4071
5237B 4071
5238B 4071
6525B 4068
9297B 4071
A 34
AAL 1780
AALCP 3988
AAMRQ 2325
AAP 34
AAPL 34
AAR 2809
ABBV 1529
ABC 34
ABT 34
ACE.PC 3707
ACGL 34
ACGLN 3663
ACGLO 2707
ACGLP 2814
ACN 34
ADBE 34
ADI 34
ADM 34
ADM.PA 3314
ADP 34
ADSK 34
AEE 34
AEP 34
AEP.PA 2871
AEPPL 3322
AEPPZ 3465
AES 34
AESC 3607
AFL 34
AHC.2 4071
AIG 34
AIG.PA 3104
AIG.PA.1 3264
AIG.WS 1554
AIZ 34
AIZP 3312
AJG 34
AKAM 34
ALB 34
ALGN 34
ALK 34
ALL 34
ALL.PA 2409
ALL.PC 2799
ALL.PD 2610
ALL.PE 2652
ALL.PF 2725
ALL.PG 2863
ALL.PH 3206
ALL.PI 3273
AMAT 34
AMCO30 962
AMCR 3164
AMD 34
AME 34
AMGN 34
AMP 34
AMP.PA 2806
AMT 34
AMT.PA 3311
AMT.PB 3322
AMZN 34
ANSS 34
ANTX.2 3324
AON 34
AOS 34
APA 34
APA.PD 3310
APD 34
APH 34
APTV 1264
APTV.PA 3420
ARE 34
ARE.PB 4019
ARE.PC 2742
ARE.PD 1195
ARE.PE 2788
ARH.PA 2728
ARH.PB 2728
ARH.PC 2628
ATO 34
ATVI 416
AVB 34
AVB.PH 3621
AVGO 687
AVGOP 3311
AVY 34
AWK 362
AXP 34


In [271]:
list_of_missing = []
for col in volume_stock_data_clean_pivot.columns:
    list_of_missing.append((str(col), volume_stock_data_clean_pivot[col].isnull().sum()))

In [272]:
print(list_of_missing)

[('0050B', 4071), ('0051B', 4071), ('1974B', 4071), ('2091B', 4071), ('3CCIKO', 2741), ('4741B', 4055), ('4764B', 4071), ('5050B', 3490), ('5235B', 4071), ('5236B', 4071), ('5237B', 4071), ('5238B', 4071), ('6525B', 4068), ('9297B', 4071), ('A', 34), ('AAL', 1780), ('AALCP', 3988), ('AAMRQ', 2325), ('AAP', 34), ('AAPL', 34), ('AAR', 2809), ('ABBV', 1529), ('ABC', 34), ('ABT', 34), ('ACE.PC', 3707), ('ACGL', 34), ('ACGLN', 3663), ('ACGLO', 2707), ('ACGLP', 2814), ('ACN', 34), ('ADBE', 34), ('ADI', 34), ('ADM', 34), ('ADM.PA', 3314), ('ADP', 34), ('ADSK', 34), ('AEE', 34), ('AEP', 34), ('AEP.PA', 2871), ('AEPPL', 3322), ('AEPPZ', 3465), ('AES', 34), ('AESC', 3607), ('AFL', 34), ('AHC.2', 4071), ('AIG', 34), ('AIG.PA', 3104), ('AIG.PA.1', 3264), ('AIG.WS', 1554), ('AIZ', 34), ('AIZP', 3312), ('AJG', 34), ('AKAM', 34), ('ALB', 34), ('ALGN', 34), ('ALK', 34), ('ALL', 34), ('ALL.PA', 2409), ('ALL.PC', 2799), ('ALL.PD', 2610), ('ALL.PE', 2652), ('ALL.PF', 2725), ('ALL.PG', 2863), ('ALL.PH', 3

In [273]:
sorted(list_of_missing, key= lambda x: x[1], reverse=True)

[('0050B', 4071),
 ('0051B', 4071),
 ('1974B', 4071),
 ('2091B', 4071),
 ('4764B', 4071),
 ('5235B', 4071),
 ('5236B', 4071),
 ('5237B', 4071),
 ('5238B', 4071),
 ('9297B', 4071),
 ('AHC.2', 4071),
 ('GM.1', 4071),
 ('GMM.U', 4071),
 ('PLP.Z', 4071),
 ('PXD.', 4071),
 ('6525B', 4068),
 ('SWP', 4068),
 ('MRK.WI', 4066),
 ('ZBH.WI', 4066),
 ('DOV.WI', 4063),
 ('VNO.WI', 4063),
 ('HON.WI', 4061),
 ('PSA.PT.1', 4061),
 ('4741B', 4055),
 ('PSA.PU.1', 4039),
 ('LNC.PD', 4031),
 ('ARE.PB', 4019),
 ('BAC.PS.1', 4017),
 ('DALRQ', 3991),
 ('AALCP', 3988),
 ('NEE.PR', 3988),
 ('EPB.2', 3977),
 ('UDR.PB', 3971),
 ('KMI.1', 3969),
 ('KEY.PL', 3968),
 ('C.PF.', 3957),
 ('NEMUF', 3956),
 ('MS.PP', 3954),
 ('SBT.1', 3944),
 ('DG.1', 3943),
 ('C.PG.', 3942),
 ('EQR.PD', 3938),
 ('KEY.PH', 3934),
 ('LYB.B', 3921),
 ('NIMC', 3917),
 ('RJF.PA', 3915),
 ('RJF.PB', 3915),
 ('EMN.WS', 3912),
 ('PSA.PR', 3900),
 ('PSA.PS', 3900),
 ('C.PH.1', 3899),
 ('MS.PO', 3899),
 ('FRC.1', 3889),
 ('FRC.PA.1', 3888),
 ('F

In [274]:
list_of_missing

[('0050B', 4071),
 ('0051B', 4071),
 ('1974B', 4071),
 ('2091B', 4071),
 ('3CCIKO', 2741),
 ('4741B', 4055),
 ('4764B', 4071),
 ('5050B', 3490),
 ('5235B', 4071),
 ('5236B', 4071),
 ('5237B', 4071),
 ('5238B', 4071),
 ('6525B', 4068),
 ('9297B', 4071),
 ('A', 34),
 ('AAL', 1780),
 ('AALCP', 3988),
 ('AAMRQ', 2325),
 ('AAP', 34),
 ('AAPL', 34),
 ('AAR', 2809),
 ('ABBV', 1529),
 ('ABC', 34),
 ('ABT', 34),
 ('ACE.PC', 3707),
 ('ACGL', 34),
 ('ACGLN', 3663),
 ('ACGLO', 2707),
 ('ACGLP', 2814),
 ('ACN', 34),
 ('ADBE', 34),
 ('ADI', 34),
 ('ADM', 34),
 ('ADM.PA', 3314),
 ('ADP', 34),
 ('ADSK', 34),
 ('AEE', 34),
 ('AEP', 34),
 ('AEP.PA', 2871),
 ('AEPPL', 3322),
 ('AEPPZ', 3465),
 ('AES', 34),
 ('AESC', 3607),
 ('AFL', 34),
 ('AHC.2', 4071),
 ('AIG', 34),
 ('AIG.PA', 3104),
 ('AIG.PA.1', 3264),
 ('AIG.WS', 1554),
 ('AIZ', 34),
 ('AIZP', 3312),
 ('AJG', 34),
 ('AKAM', 34),
 ('ALB', 34),
 ('ALGN', 34),
 ('ALK', 34),
 ('ALL', 34),
 ('ALL.PA', 2409),
 ('ALL.PC', 2799),
 ('ALL.PD', 2610),
 ('ALL.

In [275]:
volume_stock_data_clean_pivot

tic,0050B,0051B,1974B,2091B,3CCIKO,4741B,4764B,5050B,5235B,5236B,...,ZB.PF,ZBH,ZBH.WI,ZBRA,ZION,ZIONN,ZIONO,ZIONP,ZIONW,ZIONZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-01-02,,,,,,,,,,,...,,,,,,,,,,
2007-01-03,,,,,0.0,,,0.0,,,...,,1792300.0,,560760.0,693041.0,,,0.0,,
2007-01-04,,,,,0.0,,,0.0,,,...,,2053000.0,,355633.0,569895.0,,,0.0,,
2007-01-05,,,,,0.0,,,0.0,,,...,,1220400.0,,645339.0,829044.0,,,500.0,,
2007-01-08,,,,,0.0,,,0.0,,,...,,1806100.0,,318215.0,620257.0,,,12700.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-09,,,,,,,,,,,...,,1423625.0,,535362.0,902684.0,,7514.0,2320.0,,
2023-01-10,,,,,,,,,,,...,,1894247.0,,353624.0,878534.0,,6911.0,1305.0,,
2023-01-11,,,,,,,,,,,...,,2448268.0,,600101.0,697494.0,,3202.0,1295.0,,
2023-01-12,,,,,,,,,,,...,,1888908.0,,735454.0,1295242.0,,13750.0,4946.0,,


In [276]:
volume_stock_data_clean_pivot= volume_stock_data_clean_pivot[start:end][used_ticker_symbols]

In [277]:
# we will fill the missing values with the previous day's value, otherwise we need to throw away to much data

volume_stock_data_clean_pivot.fillna(method='backfill', inplace=True)

In [278]:
volume_stock_data_clean_pivot 

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,6290038.0,43167060.0,800421.0,19986670.0,13767660.0,1972401.0,20266410.0,977536.0,4039095.0,6483720.0,...,6594952.0,2486815.0,2702053.0,16144970.0,856539.0,927252.0,4376855.0,1397462.0,213748.0,2551059.0
2013-01-03,4114289.0,43167060.0,520539.0,12579170.0,16739210.0,2027242.0,22148090.0,655362.0,3340674.0,3905904.0,...,3967047.0,1731879.0,2403510.0,13268470.0,525445.0,610706.0,2627253.0,1574966.0,99849.0,2267804.0
2013-01-04,4601506.0,43167060.0,614933.0,21196320.0,21372200.0,2462183.0,15819140.0,530563.0,3145680.0,3809146.0,...,3624749.0,2129025.0,1743995.0,11427870.0,660389.0,563325.0,2719375.0,1189537.0,145944.0,3577660.0
2013-01-07,2567557.0,43167060.0,1051969.0,17262620.0,17896920.0,1803504.0,13121470.0,423123.0,2262426.0,3632022.0,...,2450680.0,1500928.0,2853210.0,11799780.0,554059.0,481287.0,3729703.0,954238.0,90581.0,2285917.0
2013-01-08,2787813.0,43167060.0,840003.0,16350190.0,17863000.0,1533163.0,15053860.0,580903.0,2468876.0,3080868.0,...,5203250.0,975921.0,3762904.0,14228400.0,698803.0,1084482.0,12406140.0,905965.0,112590.0,3758558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779426.0,17160810.0,524534.0,63755280.0,2684724.0,528005.0,2618369.0,890265.0,1213251.0,1627915.0,...,1932785.0,1134475.0,1201346.0,11534290.0,1264320.0,321094.0,699735.0,789770.0,180932.0,1263345.0
2022-12-27,879543.0,18836880.0,717073.0,68887690.0,2669090.0,740310.0,2927352.0,666705.0,1009563.0,1463215.0,...,2265992.0,4024281.0,1580610.0,11874640.0,998778.0,468173.0,1299183.0,742499.0,252510.0,704184.0
2022-12-28,784312.0,20388570.0,685513.0,85247210.0,2944327.0,804725.0,3264409.0,1107929.0,1386969.0,1668511.0,...,2244112.0,2131050.0,1513689.0,10675270.0,1430535.0,480378.0,964755.0,750108.0,241209.0,679957.0
2022-12-29,854026.0,19652320.0,719051.0,75569790.0,3112518.0,1478972.0,3047818.0,1063408.0,1516779.0,1789922.0,...,1845747.0,1431045.0,1398037.0,10526150.0,1139467.0,516306.0,875749.0,686569.0,274913.0,685349.0


In [279]:
df_volume_clean_wrds = volume_stock_data_clean_pivot

In [280]:
# removing rows from df_volume_clean_wrds to match df_volume_clean, so  that dataframes are same shape. using both yahoo finance and WRDS Computat volumes data to see if there is a difference in the results
mask_3 = volume_stock_data_clean_pivot.index.isin(df_daily_returns_clean.index)
df_volume_clean_wrds = df_volume_clean_wrds[mask_3]


In [281]:
# reference: Wharton Research Data Services. "WRDS" wrds.wharton.upenn.edu, accessed 2023-01-16.

# loading WRDS Computat shares outanding Data

In [282]:
# stock volume data row used belpw: https://www.dropbox.com/s/80r3e9ayeppz8ca/Stock_volume_WRDS_query_csv.zip?dl=0
# shares oustanding data row used belpw: https://www.dropbox.com/s/c9nf1lh3sdggsp0/Stock_sharesoutstanding_WRDS_query_csv.csv?dl=0

In [283]:
shares_outstanding_stock_data = pd.read_csv(r'C:\Users\Nitai Nijholt\OneDrive\Desktop\project computational science\PCS_Financial_Market_ABM\Stock_sharesoutstanding_WRDS_query_csv.csv', parse_dates=True, index_col=2)


Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.



In [284]:
shares_outstanding_stock_data.columns

Index(['gvkey', 'iid', 'tic', 'cshoc'], dtype='object')

In [285]:
shares_outstanding_stock_data

Unnamed: 0_level_0,gvkey,iid,tic,cshoc
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-01-02,1045,01,AAMRQ,249122000.0
2008-01-03,1045,01,AAMRQ,249122000.0
2008-01-04,1045,01,AAMRQ,249122000.0
2008-01-07,1045,01,AAMRQ,249122000.0
2008-01-08,1045,01,AAMRQ,249122000.0
...,...,...,...,...
2010-11-30,294524,2,LYB.B,263902000.0
2010-12-01,294524,2,LYB.B,263902000.0
2010-12-02,294524,2,LYB.B,263902000.0
2010-12-03,294524,2,LYB.B,263902000.0


In [286]:
shares_outstanding_stock_data

Unnamed: 0_level_0,gvkey,iid,tic,cshoc
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-01-02,1045,01,AAMRQ,249122000.0
2008-01-03,1045,01,AAMRQ,249122000.0
2008-01-04,1045,01,AAMRQ,249122000.0
2008-01-07,1045,01,AAMRQ,249122000.0
2008-01-08,1045,01,AAMRQ,249122000.0
...,...,...,...,...
2010-11-30,294524,2,LYB.B,263902000.0
2010-12-01,294524,2,LYB.B,263902000.0
2010-12-02,294524,2,LYB.B,263902000.0
2010-12-03,294524,2,LYB.B,263902000.0


In [287]:
shares_outstanding_stock_data = shares_outstanding_stock_data.drop(columns=['gvkey', 'iid'])

In [288]:
shares_outstanding_stock_data

Unnamed: 0_level_0,tic,cshoc
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-01-02,AAMRQ,249122000.0
2008-01-03,AAMRQ,249122000.0
2008-01-04,AAMRQ,249122000.0
2008-01-07,AAMRQ,249122000.0
2008-01-08,AAMRQ,249122000.0
...,...,...
2010-11-30,LYB.B,263902000.0
2010-12-01,LYB.B,263902000.0
2010-12-02,LYB.B,263902000.0
2010-12-03,LYB.B,263902000.0


In [289]:
shares_outstanding_stock_data['tic'].nunique()

949

In [290]:
shares_outstanding_stock_data.tic.nunique()

949

In [291]:
shares_outstanding_stock_data[ 'Date'] = shares_outstanding_stock_data.index

In [292]:
shares_outstanding_stock_data

Unnamed: 0_level_0,tic,cshoc,Date
datadate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-01-02,AAMRQ,249122000.0,2008-01-02
2008-01-03,AAMRQ,249122000.0,2008-01-03
2008-01-04,AAMRQ,249122000.0,2008-01-04
2008-01-07,AAMRQ,249122000.0,2008-01-07
2008-01-08,AAMRQ,249122000.0,2008-01-08
...,...,...,...
2010-11-30,LYB.B,263902000.0,2010-11-30
2010-12-01,LYB.B,263902000.0,2010-12-01
2010-12-02,LYB.B,263902000.0,2010-12-02
2010-12-03,LYB.B,263902000.0,2010-12-03


In [293]:
shares_outstanding_stock_data_clean_pivot = shares_outstanding_stock_data.pivot(index = 'Date', columns='tic',values = 'cshoc')

In [294]:
shares_outstanding_stock_data_clean_pivot

tic,0050B,0051B,1974B,2091B,3CCIKO,4741B,4764B,5050B,5235B,5236B,...,ZB.PF,ZBH,ZBH.WI,ZBRA,ZION,ZIONN,ZIONO,ZIONP,ZIONW,ZIONZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-02,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-03,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-04,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-07,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-08,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-10,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-11,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-12,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-13,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,


In [295]:
shares_outstanding_stock_data_clean_pivot.shape

(3815, 949)

In [296]:
for col in volume_stock_data_clean_pivot.columns:
    print(col, volume_stock_data_clean_pivot[col].isnull().sum())

A 0
AAL 0
AAP 0
AAPL 0
ABBV 0
ABC 0
ABT 0
ACGL 0
ACN 0
ADBE 0
ADI 0
ADM 0
ADP 0
ADSK 0
AEE 0
AEP 0
AES 0
AFL 0
AIG 0
AIZ 0
AJG 0
AKAM 0
ALB 0
ALGN 0
ALK 0
ALL 0
AMAT 0
AMCR 0
AMD 0
AME 0
AMGN 0
AMP 0
AMT 0
AMZN 0
ANSS 0
AON 0
AOS 0
APA 0
APD 0
APH 0
APTV 0
ARE 0
ATO 0
ATVI 0
AVB 0
AVGO 0
AVY 0
AWK 0
AXP 0
AZO 0
BA 0
BAC 0
BALL 0
BAX 0
BBWI 0
BBY 0
BDX 0
BEN 0
BIIB 0
BIO 0
BK 0
BKNG 0
BKR 0
BLK 0
BMY 0
BR 0
BRO 0
BSX 0
BWA 0
BXP 0
C 0
CAG 0
CAH 0
CAT 0
CB 0
CBOE 0
CBRE 0
CCI 0
CCL 0
CDNS 0
CE 0
CF 0
CHD 0
CHRW 0
CHTR 0
CI 0
CINF 0
CL 0
CLX 0
CMA 0
CMCSA 0
CME 0
CMG 0
CMI 0
CMS 0
CNC 0
CNP 0
COF 0
COO 0
COP 0
COST 0
CPB 0
CPRT 0
CPT 0
CRL 0
CRM 0
CSCO 0
CSGP 0
CSX 0
CTAS 0
CTRA 0
CTSH 0
CVS 0
CVX 0
D 0
DAL 0
DD 0
DE 0
DFS 0
DG 0
DGX 0
DHI 0
DHR 0
DIS 0
DISH 0
DLR 0
DLTR 0
DOV 0
DPZ 0
DRI 0
DTE 0
DUK 0
DVA 0
DVN 0
DXC 0
DXCM 0
EA 0
EBAY 0
ECL 0
ED 0
EFX 0
EIX 0
EL 0
ELV 0
EMN 0
EMR 0
ENPH 0
EOG 0
EPAM 0
EQIX 0
EQR 0
EQT 0
ES 0
ESS 0
ETN 0
ETR 0
EVRG 0
EW 0
EXC 0
EXPD 0
EXPE 0
EXR 0
F 0
FA

In [297]:
list_of_missing = []
for col in shares_outstanding_stock_data_clean_pivot.columns:
    list_of_missing.append((str(col), shares_outstanding_stock_data_clean_pivot[col].isnull().sum()))

In [298]:
print(list_of_missing)

[('0050B', 3815), ('0051B', 3815), ('1974B', 3815), ('2091B', 3815), ('3CCIKO', 2736), ('4741B', 3799), ('4764B', 3815), ('5050B', 3485), ('5235B', 3815), ('5236B', 3815), ('5237B', 3815), ('5238B', 3815), ('6525B', 3812), ('9297B', 3815), ('A', 29), ('AAL', 1524), ('AALCP', 3734), ('AAMRQ', 2320), ('AAP', 29), ('AAPL', 29), ('AAR', 2804), ('ABBV', 1273), ('ABC', 29), ('ABT', 29), ('ACE.PC', 3702), ('ACGL', 29), ('ACGLN', 3407), ('ACGLO', 2452), ('ACGLP', 2573), ('ACN', 29), ('ADBE', 29), ('ADI', 29), ('ADM', 29), ('ADM.PA', 3058), ('ADP', 29), ('ADSK', 29), ('AEE', 29), ('AEP', 29), ('AEP.PA', 2616), ('AEPPL', 3066), ('AEPPZ', 3209), ('AES', 29), ('AESC', 3351), ('AFL', 29), ('AHC.2', 3815), ('AIG', 29), ('AIG.PA', 2848), ('AIG.PA.1', 3008), ('AIG.WS', 3815), ('AIZ', 29), ('AIZP', 3057), ('AJG', 29), ('AKAM', 29), ('ALB', 29), ('ALGN', 29), ('ALK', 29), ('ALL', 29), ('ALL.PA', 2153), ('ALL.PC', 2543), ('ALL.PD', 2354), ('ALL.PE', 2396), ('ALL.PF', 2469), ('ALL.PG', 2608), ('ALL.PH', 2

In [299]:
sorted(list_of_missing, key= lambda x: x[1], reverse=True)

[('0050B', 3815),
 ('0051B', 3815),
 ('1974B', 3815),
 ('2091B', 3815),
 ('4764B', 3815),
 ('5235B', 3815),
 ('5236B', 3815),
 ('5237B', 3815),
 ('5238B', 3815),
 ('9297B', 3815),
 ('AHC.2', 3815),
 ('AIG.WS', 3815),
 ('BAC.WA', 3815),
 ('BAC.WB', 3815),
 ('C.WA', 3815),
 ('CELG.R', 3815),
 ('CGBBW', 3815),
 ('CMA.WS', 3815),
 ('COF.WS', 3815),
 ('DOV.WI', 3815),
 ('EMN.WS', 3815),
 ('EXPEW', 3815),
 ('EXPEZ', 3815),
 ('F.WS', 3815),
 ('GM.1', 3815),
 ('GM.WA', 3815),
 ('GM.WB', 3815),
 ('GM.WC', 3815),
 ('GMM.U', 3815),
 ('HIG.WT', 3815),
 ('HON.WI', 3815),
 ('IACIW', 3815),
 ('JPM.WS', 3815),
 ('KMI.WS', 3815),
 ('LNC.WS', 3815),
 ('MRK.WI', 3815),
 ('MTB.WS', 3815),
 ('NEMUF', 3815),
 ('OXY.WS', 3815),
 ('PLP.Z', 3815),
 ('PNC.WS', 3815),
 ('PXD.', 3815),
 ('SBNYW', 3815),
 ('VNO.WI', 3815),
 ('WFC.WS', 3815),
 ('ZIONW', 3815),
 ('ZIONZ', 3815),
 ('ZBH.WI', 3814),
 ('6525B', 3812),
 ('SWP', 3811),
 ('4741B', 3799),
 ('LNC.PD', 3775),
 ('BAC.PS.1', 3762),
 ('AALCP', 3734),
 ('NEE.PR'

In [300]:
list_of_missing

[('0050B', 3815),
 ('0051B', 3815),
 ('1974B', 3815),
 ('2091B', 3815),
 ('3CCIKO', 2736),
 ('4741B', 3799),
 ('4764B', 3815),
 ('5050B', 3485),
 ('5235B', 3815),
 ('5236B', 3815),
 ('5237B', 3815),
 ('5238B', 3815),
 ('6525B', 3812),
 ('9297B', 3815),
 ('A', 29),
 ('AAL', 1524),
 ('AALCP', 3734),
 ('AAMRQ', 2320),
 ('AAP', 29),
 ('AAPL', 29),
 ('AAR', 2804),
 ('ABBV', 1273),
 ('ABC', 29),
 ('ABT', 29),
 ('ACE.PC', 3702),
 ('ACGL', 29),
 ('ACGLN', 3407),
 ('ACGLO', 2452),
 ('ACGLP', 2573),
 ('ACN', 29),
 ('ADBE', 29),
 ('ADI', 29),
 ('ADM', 29),
 ('ADM.PA', 3058),
 ('ADP', 29),
 ('ADSK', 29),
 ('AEE', 29),
 ('AEP', 29),
 ('AEP.PA', 2616),
 ('AEPPL', 3066),
 ('AEPPZ', 3209),
 ('AES', 29),
 ('AESC', 3351),
 ('AFL', 29),
 ('AHC.2', 3815),
 ('AIG', 29),
 ('AIG.PA', 2848),
 ('AIG.PA.1', 3008),
 ('AIG.WS', 3815),
 ('AIZ', 29),
 ('AIZP', 3057),
 ('AJG', 29),
 ('AKAM', 29),
 ('ALB', 29),
 ('ALGN', 29),
 ('ALK', 29),
 ('ALL', 29),
 ('ALL.PA', 2153),
 ('ALL.PC', 2543),
 ('ALL.PD', 2354),
 ('ALL.

In [301]:
shares_outstanding_stock_data_clean_pivot

tic,0050B,0051B,1974B,2091B,3CCIKO,4741B,4764B,5050B,5235B,5236B,...,ZB.PF,ZBH,ZBH.WI,ZBRA,ZION,ZIONN,ZIONO,ZIONP,ZIONW,ZIONZ
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-02,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-03,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-04,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-07,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
2008-01-08,,,,,6361000.0,,,8255000.0,,,...,,234725000.0,,68017000.0,106980000.0,,,9600000.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-10,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-11,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-12,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,
2023-01-13,,,,,,,,,,,...,,209852000.0,,51630000.0,149618000.0,,5536000.0,2646000.0,,


In [302]:
shares_outstanding_stock_data_clean_pivot= shares_outstanding_stock_data_clean_pivot[start:end][used_ticker_symbols]

In [303]:
# we will fill the missing values with the previous day's value, otherwise we need to throw away to much data

shares_outstanding_stock_data_clean_pivot.fillna(method='backfill', inplace=True)

In [304]:
df_shares_outstanding_clean = shares_outstanding_stock_data_clean_pivot
df_shares_outstanding_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [305]:
df_shares_outstanding_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [306]:
df_volume_clean.shape

(2518, 463)

In [307]:
df_shares_outstanding_clean.shape

(2539, 463)

In [308]:
df_daily_returns_clean.shape

(2518, 463)

In [309]:
# removing rows from df_shares_outstanding_clean and df_volume_clean that are not in df_daily_returns_clean, so dataframes are same shape
mask_1 = df_shares_outstanding_clean.index.isin(df_volume_clean.index)
df_shares_outstanding_clean = df_shares_outstanding_clean[mask_1]
mask_2 = df_shares_outstanding_clean.index.isin(df_daily_returns_clean.index)
df_shares_outstanding_clean = df_shares_outstanding_clean[mask_2]
df_volume_clean = df_volume_clean[mask_2]
assert df_shares_outstanding_clean.shape == df_volume_clean.shape == df_daily_returns_clean.shape


In [310]:
df_shares_outstanding_clean

tic,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-03,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100824000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-04,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-07,347935000.0,219297000.0,73364000.0,9.388190e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
2013-01-08,347935000.0,219297000.0,73364000.0,9.390390e+08,1.580670e+09,235476000.0,1.580668e+09,136551000.0,644334000.0,495055000.0,...,541532000.0,100867000.0,487620000.0,4.559343e+09,141929000.0,185794000.0,451809000.0,173502000.0,50858000.0,184182000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-27,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-28,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0
2022-12-29,296072000.0,649901000.0,59254000.0,1.590812e+10,1.768481e+09,205669000.0,1.743574e+09,369873000.0,658389000.0,464900000.0,...,735917000.0,113314000.0,547248000.0,4.118293e+09,214912000.0,180222000.0,281688000.0,209852000.0,51630000.0,149618000.0


In [311]:
# reference: Wharton Research Data Services. "WRDS" wrds.wharton.upenn.edu, accessed 2023-01-16.

In [312]:
# Plotting mean (adj close) daily returns, averaged per day across all stocks:
df_daily_returns_clean.mean(1).plot(title='Mean Daily Returns across all stocks')

In [313]:
# Plotting mean daily volume, averaged per day across all stocks:
df_volume_clean.mean(1).plot(title='Mean Daily Volume across all stocks')


In [314]:
# Plotting Total daily volume, summed per day across all stocks:
df_volume_clean.sum(1).plot(title='Total Daily Volume across all stocks')

In [315]:
df_shares_outstanding_clean.sum(1).plot(title='Total Shares Outstanding across all stocks')

# Getting stock volume data using WRDS plugin:

conn = wrds.Connection(wrds_username='nitainijholt')

In [316]:
# saving the dataframes to csv files
df_daily_returns_clean.to_csv('data/df_daily_returns_clean.csv')
df_volume_clean.to_csv('data/df_volume_clean.csv')
df_shares_outstanding_clean.to_csv('data/df_shares_outstanding_clean.csv')
df_volume_clean_wrds.to_csv('data/df_volume_clean_WRDS.csv')

# Code below is unused so far

# Making a DataFrame containing Daily volume

In [317]:
df['Volume'].columns

Index(['A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACGL', 'ACN', 'ADBE',
       ...
       'WYNN', 'XEL', 'XOM', 'XRAY', 'XYL', 'YUM', 'ZBH', 'ZBRA', 'ZION',
       'ZTS'],
      dtype='object', length=503)

In [318]:
col_with_missing_values = {}
for col in df['Volume'].columns:
    sum_of_missing_values = df['Adj Close'][col].isnull().sum()
    if sum_of_missing_values > 0:
        col_with_missing_values[col] = sum_of_missing_values
        print(col)

ALLE
ANET
BF.B
BRK.B
CARR
CDAY
CDW
CEG
CFG
CTLT
CTVA
CZR
DOW
ETSY
FOX
FOXA
FTV
HLT
HPE
HWM
INVH
IQV
IR
KEYS
KHC
LW
MRNA
NCLH
NWS
NWSA
OGN
OTIS
PAYC
PYPL
QRVO
SEDG
SYF
VICI
WRK
ZTS


## Acessing attributes of dataframe

In [319]:
close = df.Close

In [320]:
close[close.index > end - dt.timedelta(days=100)].describe()

Unnamed: 0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
count,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,...,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0,68.0
mean,141.275,13.461912,161.760147,143.183087,152.418089,156.387354,103.165441,54.966765,275.259706,316.742792,...,72.613823,66.590735,105.655882,30.081324,103.628971,120.13647,115.295441,259.058235,50.205147,148.118677
std,11.258239,0.815157,15.984451,6.673953,9.121142,11.898258,4.260518,6.262076,14.522118,22.157599,...,10.343927,3.823638,7.284724,1.282007,9.611404,8.740005,7.909126,13.208136,2.208463,5.278865
min,121.550003,11.86,139.380005,126.040001,134.210007,135.330002,95.059998,41.669998,250.070007,275.200012,...,54.48,57.939999,83.980003,26.83,86.199997,105.25,104.550003,226.880005,45.740002,131.139999
25%,131.987495,12.7275,146.934998,138.91,144.4175,142.935005,99.482498,48.537499,262.894997,296.777504,...,64.297499,64.247501,103.17,29.1725,93.08,110.884998,108.012503,251.347496,48.414999,145.729996
50%,144.790001,13.65,159.485001,143.805,151.805,158.555,103.449997,57.01,273.380005,326.179993,...,74.254997,67.904999,106.994999,30.295,108.605003,122.684998,113.329998,260.744995,50.375,148.575005
75%,150.880001,14.1025,174.884998,148.287498,161.615002,167.177498,106.912502,60.01,288.087509,336.522491,...,81.787498,69.674997,110.802502,30.98,111.257502,128.607498,123.559999,267.182503,51.835,151.697498
max,157.660004,14.93,190.669998,155.740005,165.990005,173.990005,111.529999,63.619999,302.829987,345.959991,...,86.669998,71.599998,114.18,32.279999,114.989998,131.220001,128.429993,288.0,55.389999,157.419998


## Creating Moving Averages

In [321]:
df['MA20'] = df['Close'].rolling(window = 20, min_periods = 1).mean()
df['MA9'] = df['Close'].rolling(window = 9, min_periods = 1).mean()

ValueError: Cannot set a DataFrame with multiple columns to the single column MA20

## Plotting data

In [None]:
## https://www.youtube.com/watch?v=yQtT_4RgT2k
# fig = make_subplots(rows = 2, cols = 1, shared_xaxes= True, vertical_spacing = 0.1, subplot_titles = ('QQQ', 'Volume'),
#                     row_width = [0.2, 0.7])
# fig.add_trace(go.Candlestick(x=df.index, open=df["Open"], high=df["High"],
#                 low=df["Low"], close=df["Close"], name="OHLC"), 
#                 row=1, col=1)

In [None]:
figure = go.Figure(
        data = [
            go.Candlestick(
            x=df.index, 
            close=df["Close"], 
            open=df["Open"], 
            high=df["High"],
            low=df["Low"],
            increasing_line_color = 'red',
            decreasing_line_color = 'red',
            )
        ]
)
figure.show()

In [None]:
figure.show()

In [None]:
close.plot()

In [None]:
# figure.add_trace()

TypeError: add_trace() missing 1 required positional argument: 'trace'

In [None]:
close['QQQ'].pct_change().plot(kind='hist')

KeyError: 'QQQ'