In [115]:
import datetime as dt
import pandas as pd
from pandas_datareader import data as pdr
import plotly.offline as plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import yfinance as yf
yf.pdr_override()
import os
plotly.init_notebook_mode(connected=True)
pd.options.plotting.backend = 'plotly'
import wrds
import numpy as np

In [116]:
df_adj_close_clean = pd.read_csv('data/df_adj_close_clean.csv')
df_open_clean = pd.read_csv('data/df_open_clean.csv')
df_volume_clean = pd.read_csv('data/df_volume_clean.csv')
df_shares_outstanding_clean = pd.read_csv('data/df_shares_outstanding_clean.csv')
df_volume_clean_wrds = pd.read_csv('data/df_volume_clean_WRDS.csv')

In [117]:
# change index to datetime
df_adj_close_clean.index = pd.to_datetime(df_adj_close_clean['Date'])
df_open_clean.index = pd.to_datetime(df_open_clean['Date'])
df_volume_clean.index = pd.to_datetime(df_volume_clean['Date'])
df_shares_outstanding_clean.index = pd.to_datetime(df_shares_outstanding_clean['Date'])
df_volume_clean_wrds.index = pd.to_datetime(df_volume_clean_wrds['Date'])


In [118]:
df_adj_close_clean = df_adj_close_clean.drop(columns=['Date'])
df_open_clean = df_open_clean.drop(columns=['Date'])
df_volume_clean = df_volume_clean.drop(columns=['Date'])
df_shares_outstanding_clean = df_shares_outstanding_clean.drop(columns=['Date'])
df_volume_clean_wrds = df_volume_clean_wrds.drop(columns=['Date'])

# Velocity caculation

We do the velocity calculation in a fashion similar as Feng et. al's 2012 paper, in the following steps:
1. sum volume across stocks for each day
2. take the mean of shares outstanding across stocks for each day
3. sum 1. across 1-year intervals, to get total trading volume across stocks each year sum.
4. sum 2. across 1-year intervals, to get mean shares outstanding across stocks each year.
5. To obtain velocity: devide 3. by 4., so devide total volume across stocks each year by mean shares outstanding across stocks each year

In [119]:
df_volume_clean_wrds = df_volume_clean_wrds
df_volume_clean_wrds

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACGL,ACN,ADBE,...,WY,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,6290038.0,43167060.0,800421.0,19986670.0,13767660.0,1972401.0,20266410.0,977536.0,4039095.0,6483720.0,...,6594952.0,2486815.0,2702053.0,16144970.0,856539.0,927252.0,4376855.0,1397462.0,213748.0,2551059.0
2013-01-03,4114289.0,43167060.0,520539.0,12579170.0,16739210.0,2027242.0,22148090.0,655362.0,3340674.0,3905904.0,...,3967047.0,1731879.0,2403510.0,13268470.0,525445.0,610706.0,2627253.0,1574966.0,99849.0,2267804.0
2013-01-04,4601506.0,43167060.0,614933.0,21196320.0,21372200.0,2462183.0,15819140.0,530563.0,3145680.0,3809146.0,...,3624749.0,2129025.0,1743995.0,11427870.0,660389.0,563325.0,2719375.0,1189537.0,145944.0,3577660.0
2013-01-07,2567557.0,43167060.0,1051969.0,17262620.0,17896920.0,1803504.0,13121470.0,423123.0,2262426.0,3632022.0,...,2450680.0,1500928.0,2853210.0,11799780.0,554059.0,481287.0,3729703.0,954238.0,90581.0,2285917.0
2013-01-08,2787813.0,43167060.0,840003.0,16350190.0,17863000.0,1533163.0,15053860.0,580903.0,2468876.0,3080868.0,...,5203250.0,975921.0,3762904.0,14228400.0,698803.0,1084482.0,12406140.0,905965.0,112590.0,3758558.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,779426.0,17160810.0,524534.0,63755280.0,2684724.0,528005.0,2618369.0,890265.0,1213251.0,1627915.0,...,1932785.0,1134475.0,1201346.0,11534290.0,1264320.0,321094.0,699735.0,789770.0,180932.0,1263345.0
2022-12-27,879543.0,18836880.0,717073.0,68887690.0,2669090.0,740310.0,2927352.0,666705.0,1009563.0,1463215.0,...,2265992.0,4024281.0,1580610.0,11874640.0,998778.0,468173.0,1299183.0,742499.0,252510.0,704184.0
2022-12-28,784312.0,20388570.0,685513.0,85247210.0,2944327.0,804725.0,3264409.0,1107929.0,1386969.0,1668511.0,...,2244112.0,2131050.0,1513689.0,10675270.0,1430535.0,480378.0,964755.0,750108.0,241209.0,679957.0
2022-12-29,854026.0,19652320.0,719051.0,75569790.0,3112518.0,1478972.0,3047818.0,1063408.0,1516779.0,1789922.0,...,1845747.0,1431045.0,1398037.0,10526150.0,1139467.0,516306.0,875749.0,686569.0,274913.0,685349.0


In [120]:
# summing the volume of all stocks per year
df_volume_clean_wrds_summed_across_stocks = df_volume_clean_wrds.sum(axis=1)
df_volume_clean_wrds_summed_across_stocks

Date
2013-01-02    2.928939e+09
2013-01-03    2.675400e+09
2013-01-04    2.386758e+09
2013-01-07    2.391197e+09
2013-01-08    2.469629e+09
                  ...     
2022-12-23    1.497168e+09
2022-12-27    1.663919e+09
2022-12-28    1.655365e+09
2022-12-29    1.639356e+09
2022-12-30    1.615183e+09
Length: 2518, dtype: float64

In [121]:
# summing the shares outstanding of all stocks per year
df_shares_outstanding_clean_mean_across_stocks = df_shares_outstanding_clean.mean(axis=1)
df_shares_outstanding_clean_mean_across_stocks

Date
2013-01-02    5.845212e+08
2013-01-03    5.845228e+08
2013-01-04    5.845230e+08
2013-01-07    5.845230e+08
2013-01-08    5.845270e+08
                  ...     
2022-12-23    6.830501e+08
2022-12-27    6.830501e+08
2022-12-28    6.830461e+08
2022-12-29    6.830609e+08
2022-12-30    6.830640e+08
Length: 2518, dtype: float64

In [122]:
df_velocity_daily = df_volume_clean_wrds_summed_across_stocks / df_shares_outstanding_clean_mean_across_stocks
df_velocity_daily

Date
2013-01-02    5.010834
2013-01-03    4.577068
2013-01-04    4.083258
2013-01-07    4.090852
2013-01-08    4.225004
                ...   
2022-12-23    2.191887
2022-12-27    2.436013
2022-12-28    2.423505
2022-12-29    2.400015
2022-12-30    2.364615
Length: 2518, dtype: float64

In [123]:
# resample volume clean wrds and shares outstanding clean dataframes to yearly frequency
df_volume_across_stocks_yearly = df_volume_clean_wrds_summed_across_stocks.resample('Y').sum()
df_shares_outstanding_yearly = df_shares_outstanding_clean_mean_across_stocks.resample('Y').sum()

In [124]:
df_volume_across_stocks_yearly

Date
2013-12-31    5.729787e+11
2014-12-31    5.245225e+11
2015-12-31    5.503212e+11
2016-12-31    5.753032e+11
2017-12-31    5.098813e+11
2018-12-31    5.771541e+11
2019-12-31    4.835171e+11
2020-12-31    6.458376e+11
2021-12-31    5.184856e+11
2022-12-31    5.902643e+11
Freq: A-DEC, dtype: float64

In [125]:
df_shares_outstanding_yearly

Date
2013-12-31    1.479610e+11
2014-12-31    1.500665e+11
2015-12-31    1.523661e+11
2016-12-31    1.523036e+11
2017-12-31    1.526511e+11
2018-12-31    1.525540e+11
2019-12-31    1.514214e+11
2020-12-31    1.547089e+11
2021-12-31    1.608621e+11
2022-12-31    1.653258e+11
Freq: A-DEC, dtype: float64

In [126]:
# we device both dataframes to get the velocity
df_velocity_yearly = df_volume_across_stocks_yearly / df_shares_outstanding_yearly

In [127]:
df_velocity_yearly

Date
2013-12-31    3.872497
2014-12-31    3.495268
2015-12-31    3.611835
2016-12-31    3.777345
2017-12-31    3.340175
2018-12-31    3.783277
2019-12-31    3.193189
2020-12-31    4.174533
2021-12-31    3.223168
2022-12-31    3.570309
Freq: A-DEC, dtype: float64

In [128]:
df_average_yearly_trading_velocity = df_velocity_yearly.mean(axis=0)
df_average_yearly_trading_velocity

3.6041594549117897

# Make return calculation