In [1]:
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [2]:
# read raw data for the 6 firms and the Fama-French factors
data = pd.read_csv('raw-data.csv')
data.head()

Unnamed: 0,Date,AMGN,BDX,COO,CVS,GSK,LLY,Excess Return on the Market,Small-Minus-Big Return,High-Minus-Low Return,Risk-Free Return Rate (One Month Treasury Bill Rate),Momentum Factor
0,1/31/2000,0.060354,-0.027842,0.004149,-0.122382,-0.055928,0.005639,-0.0474,0.0562,-0.0146,0.0041,0.0186
1,2/29/2000,0.070658,0.183771,-0.103306,0.001789,-0.075829,-0.107327,0.0245,0.2113,-0.0905,0.0043,0.1836
2,3/31/2000,-0.099908,-0.148226,0.187373,0.073214,0.191742,0.053628,0.052,-0.1672,0.0749,0.0047,-0.0639
3,4/28/2000,-0.087576,-0.026128,0.04466,0.159601,0.095965,0.231537,-0.064,-0.0704,0.0768,0.0046,-0.0857
4,5/31/2000,0.136161,0.139024,0.026022,0.0,-0.103483,-0.009595,-0.0442,-0.0621,0.0491,0.005,-0.0908


In [3]:
# renaming columns for ease of use
data = data.rename(columns = {'Excess Return on the Market':'MRP','Small-Minus-Big Return':'SMB','High-Minus-Low Return':'HML','Risk-Free Return Rate (One Month Treasury Bill Rate)':'Rf','Momentum Factor':'Mo'})
data[:120].head()

Unnamed: 0,Date,AMGN,BDX,COO,CVS,GSK,LLY,MRP,SMB,HML,Rf,Mo
0,1/31/2000,0.060354,-0.027842,0.004149,-0.122382,-0.055928,0.005639,-0.0474,0.0562,-0.0146,0.0041,0.0186
1,2/29/2000,0.070658,0.183771,-0.103306,0.001789,-0.075829,-0.107327,0.0245,0.2113,-0.0905,0.0043,0.1836
2,3/31/2000,-0.099908,-0.148226,0.187373,0.073214,0.191742,0.053628,0.052,-0.1672,0.0749,0.0047,-0.0639
3,4/28/2000,-0.087576,-0.026128,0.04466,0.159601,0.095965,0.231537,-0.064,-0.0704,0.0768,0.0046,-0.0857
4,5/31/2000,0.136161,0.139024,0.026022,0.0,-0.103483,-0.009595,-0.0442,-0.0621,0.0491,0.005,-0.0908


In [4]:
data[:120].describe()

Unnamed: 0,AMGN,BDX,COO,CVS,GSK,LLY,MRP,SMB,HML,Rf,Mo
count,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0
mean,0.002888,0.012433,0.014206,0.008102,0.002178,0.000653,-0.001473,0.004181,0.006694,0.002267,0.000784
std,0.084117,0.068861,0.10709,0.083241,0.055688,0.084036,0.048078,0.039663,0.037384,0.001582,0.068151
min,-0.170284,-0.156459,-0.525892,-0.296904,-0.145922,-0.294729,-0.1723,-0.1672,-0.1106,0.0,-0.3439
25%,-0.050408,-0.022442,-0.059948,-0.045306,-0.034586,-0.044278,-0.02575,-0.0157,-0.010925,0.001,-0.01935
50%,0.000815,0.015033,0.016116,0.002758,0.008515,-0.003596,0.00725,0.00145,0.00305,0.0017,0.0041
75%,0.05301,0.047099,0.077662,0.065373,0.029789,0.0363,0.030625,0.024675,0.024075,0.003825,0.03215
max,0.328032,0.267139,0.237276,0.256589,0.191742,0.311987,0.1018,0.2113,0.126,0.0056,0.1836


In [5]:
data[-120:].describe()

Unnamed: 0,AMGN,BDX,COO,CVS,GSK,LLY,MRP,SMB,HML,Rf,Mo
count,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0,120.0
mean,0.01562,0.013042,0.0199,0.010704,0.006494,0.014864,0.010915,-0.000231,-0.001998,0.000427,0.002945
std,0.060951,0.048991,0.063023,0.065259,0.046805,0.0454,0.037443,0.022529,0.022803,0.000651,0.031847
min,-0.153768,-0.116858,-0.124447,-0.183042,-0.102709,-0.098027,-0.0955,-0.0491,-0.0493,0.0,-0.0865
25%,-0.018729,-0.017703,-0.022533,-0.030154,-0.025339,-0.014949,-0.009175,-0.018425,-0.018325,0.0,-0.0164
50%,0.016324,0.012613,0.019244,0.013692,0.009317,0.015992,0.01325,0.00235,-0.0035,0.0001,0.0027
75%,0.048004,0.045296,0.064655,0.058379,0.033545,0.043021,0.034,0.01165,0.010725,0.000625,0.02095
max,0.154635,0.134915,0.187265,0.16642,0.136683,0.157975,0.1135,0.0546,0.0815,0.0021,0.1029


In [6]:
# computing regression models for individual firms
firms = ['AMGN','BDX','COO','CVS','GSK','LLY']
firm_regressions = {}

for firm in firms:
    y = data[firm] - data.Rf
    x = data[['MRP','SMB','HML']]
    x = sm.add_constant(x)
    reg = sm.OLS(y[:120],x[:120]).fit()
    firm_regressions[firm] = reg.params

firm_regressions = pd.DataFrame(firm_regressions)
firm_regressions

Unnamed: 0,AMGN,BDX,COO,CVS,GSK,LLY
const,0.001217,0.009124,0.007385,0.003048,-0.000364,-0.000694
MRP,0.506159,0.367976,0.634213,0.649468,0.44135,0.584775
SMB,0.111444,0.233623,0.411576,-0.162509,-0.244368,-0.394895
HML,-0.047152,0.09075,0.562917,0.660753,0.290815,0.237939


In [7]:
# creating hedge portfolio using pair trading strategy
hedge_portfolio1 = (firm_regressions.GSK.MRP/firm_regressions.COO.MRP)*(data.COO - data.Rf) - (data.GSK - data.Rf)
hedge_portfolio2 = (firm_regressions.BDX.MRP/firm_regressions.CVS.MRP)*(data.CVS - data.Rf) - (data.BDX - data.Rf)
hedge_portfolio3 = (firm_regressions.LLY.MRP/firm_regressions.AMGN.MRP)*(data.AMGN - data.Rf) - (data.LLY - data.Rf)

In [8]:
# creating final hedge portfolio weighted by market capitalization
hedge_portfolio = 0.2 * hedge_portfolio1 + 0.3 * hedge_portfolio2 + 0.5 * hedge_portfolio3

In [9]:
# regressions for hedge portfolios over in-sample data
x = data[['MRP','SMB','HML']]
x = sm.add_constant(x)
reg_HP1 = sm.OLS(hedge_portfolio1[:120],x[:120]).fit()
reg_HP2 = sm.OLS(hedge_portfolio2[:120],x[:120]).fit()
reg_HP3 = sm.OLS(hedge_portfolio3[:120],x[:120]).fit()

In [10]:
# regression for final hedge portfolio of six assets
x = data[['MRP','SMB','HML']]
x = sm.add_constant(x)
reg_HP = sm.OLS(hedge_portfolio[-120:],x[-120:]).fit()
print(reg_HP.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     1.205
Date:                Tue, 05 Jan 2021   Prob (F-statistic):              0.311
Time:                        22:30:04   Log-Likelihood:                 235.21
No. Observations:                 120   AIC:                            -462.4
Df Residuals:                     116   BIC:                            -451.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0010      0.003     -0.288      0.7

In [11]:
# regression for final hedge portfolio of six assets considering Momentum factor
x = data[['MRP','SMB','HML','Mo']]
x = sm.add_constant(x)
reg_HP = sm.OLS(hedge_portfolio[-120:],x[-120:]).fit()
print(reg_HP.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.8975
Date:                Tue, 05 Jan 2021   Prob (F-statistic):              0.468
Time:                        22:30:04   Log-Likelihood:                 235.21
No. Observations:                 120   AIC:                            -460.4
Df Residuals:                     115   BIC:                            -446.5
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0010      0.003     -0.295      0.7