In [1]:
import requests
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [33]:
codelist = "AAPL"
benchmark = "VOO"
start_date = "2016-03-13"
end_date = "2020-03-13"

In [None]:
with open('data/token.txt', 'r') as file:
    alpha_key = file.read()

In [34]:
indexdata = pd.DataFrame()

alpha_code = [codelist]
if benchmark != 'None':
    alpha_code = codelist + [benchmark]

for x in range(0,len(alpha_code)):
    alpha_request = (
    'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol='
    f'{alpha_code[x]}&outputsize=full&apikey={alpha_key}'
    )
    response = requests.get(alpha_request).json()
    
    response_df = pd.DataFrame(response['Time Series (Daily)']).transpose().reset_index()
    response_df['4. close'] = pd.to_numeric(response_df['4. close'])
    response_df = response_df[['index','4. close']]
    response_df = response_df[response_df['index']>start_date][response_df['index']<end_date]
    if x == len(alpha_code)-1:
        response_df.columns = ['date','benchmark']
    else:
        response_df.columns = ['date',alpha_code[x]]
    
    if x == 0:
        indexdata = indexdata.append(response_df)
    else:
        indexdata = pd.merge(indexdata,response_df)

In [36]:
indexdata = indexdata.sort_values('date').reset_index(drop = True)
indexdata['portfolioreturn'] = (indexdata[codelist[0]] - indexdata[codelist[0]].shift(1))/indexdata[codelist[0]].shift(1)
indexdata['benchmarkreturn'] = (indexdata.benchmark - indexdata.benchmark.shift(1))/indexdata.benchmark.shift(1)

In [37]:
# For one set of returns, we can just compute the covariance between our benchmark and portfolio, 
# which should always be true as I will be compressing portfolio into one set of returns
covariance_matrix = indexdata[["portfolioreturn","benchmarkreturn"]].cov()
covariance_coefficient = covariance_matrix.iloc[0,1]
benchmark_variance = indexdata["benchmarkreturn"].var()
portfolio_beta = covariance_coefficient / benchmark_variance

In [38]:
model = smf.ols(formula='portfolioreturn ~ benchmarkreturn', data=indexdata)
results = model.fit()

In [39]:
overview = pd.DataFrame(results.summary().tables[0])

In [40]:
temporary_overview = overview[[2,3]]
temporary_overview.columns = [0,1]

In [41]:
overview = pd.concat([overview[[0,1]],temporary_overview],ignore_index=True)
overview = overview.transpose()
overview = overview.drop([16,17], axis = 1)
overview.columns = overview.iloc[0]
overview = overview.drop([0], axis = 0)

In [42]:
parameters = pd.DataFrame(results.summary().tables[1])

In [43]:
parameters.columns = parameters.iloc[0]
parameters = parameters.drop(0,axis = 0)

In [44]:
metrics = pd.DataFrame(results.summary().tables[2])

In [45]:
temporary_metrics = metrics[[2,3]]
temporary_metrics.columns = [0,1]

In [46]:
metrics = pd.concat([metrics[[0,1]],temporary_metrics],ignore_index=True)
metrics = metrics.transpose()

In [47]:
metrics.columns = metrics.iloc[0]
metrics = metrics.drop([0], axis = 0)

In [48]:
metrics

Unnamed: 0,Omnibus:,Prob(Omnibus):,Skew:,Kurtosis:,Durbin-Watson:,Jarque-Bera (JB):,Prob(JB):,Cond. No.
1,171.706,0.0,0.349,10.12,1.766,2145.618,0.0,106.0


In [49]:
parameters

Unnamed: 0,Unnamed: 1,coef,std err,t,P>|t|,[0.025,0.975]
1,Intercept,0.0007,0.0,1.985,0.047,8.12e-06,0.001
2,benchmarkreturn,1.25,0.038,33.038,0.0,1.176,1.324


In [50]:
overview

Unnamed: 0,Dep. Variable:,Model:,Method:,Date:,Time:,No. Observations:,Df Residuals:,Df Model:,Covariance Type:,R-squared:,Adj. R-squared:,F-statistic:,Prob (F-statistic):,Log-Likelihood:,AIC:,BIC:
1,portfolioreturn,OLS,Least Squares,"Fri, 15 May 2020",22:56:44,1006,1004,1,nonrobust,0.521,0.52,1091.0,1.34e-162,3078.5,-6153.0,-6143.0


In [240]:
frenchfama = pd.read_csv('data/ff5factordaily.CSV')

In [241]:
frenchfama = frenchfama[frenchfama['date']>start_date]
frenchfama = frenchfama[frenchfama['date']<=end_date].reset_index(drop=True)

In [250]:
regression_data = pd.concat([indexdata,frenchfama],axis = 1,join = 'inner')

In [261]:
model = smf.ols(formula='ABAQreturn ~ benchmarkreturn + SMB + HML + RMW + CMA', data=regression_data)
results = model.fit()