In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
%matplotlib inline

  from pandas.core import datetools


In [2]:
df = pd.read_csv('FF5_2by3.CSV')
df = df[df.Date>=196800]

# Problem 3
## 3-(a)

In [3]:
def trunc_3digs(v):
    return np.array([f'{x:.3f}' for x in v]) if hasattr(
        v, '__iter__') else f'{v:.3f}'
    
def report_less(res):
    rep = pd.DataFrame(np.array([
        trunc_3digs(res.params), trunc_3digs(res.tvalues),
        np.array(['(*)' if p<0.05 else '' for p in res.pvalues])]
    ), index=['est','t-stat','p<0.05'], columns=res.params.index)
    rep.loc['Adj.R^2',] = ''
    rep.loc['Adj.R^2', 'const'] = trunc_3digs(res.rsquared_adj)
    return rep

In [4]:
X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA']])
y = df['HML'].values
res1 = sm.OLS(y, X).fit()
rep1 = report_less(res1)
rep1

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA
est,-0.042,0.018,0.011,0.129,1.025
t-stat,-0.486,0.849,0.381,3.169,22.795
p<0.05,,,,(*),(*)
Adj.R^2,0.5,,,,


## 3-(b)

In [5]:
X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA', 'MOM']])
y = df['HML'].values
res2 = sm.OLS(y, X).fit()
rep2 = report_less(res2)
rep2

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM
est,0.05,-0.0,0.013,0.149,1.008,-0.129
t-stat,0.587,-0.009,0.448,3.787,23.191,-6.822
p<0.05,,,,(*),(*),(*)
Adj.R^2,0.535,,,,,


## 3-(c)

In [6]:
X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA']])
y = df['HML-DEV']
res3 = sm.OLS(y, X).fit()
rep3 = report_less(res3)
rep3['MOM'] = ''

X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA','MOM']])
y = (df['HML-DEV']).values
res4 = sm.OLS(y, X).fit()
rep4 = report_less(res4)
pd.concat([rep3, rep4])

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM
est,-0.014,0.066,0.014,-0.054,0.972,
t-stat,-0.110,2.136,0.324,-0.913,14.783,
p<0.05,,(*),,,(*),
Adj.R^2,0.279,,,,,
est,0.363,-0.008,0.02,0.028,0.900,-0.528
t-stat,4.289,-0.413,0.706,0.712,20.699,-28.009
p<0.05,(*),,,,(*),(*)
Adj.R^2,0.686,,,,,


## 3-(d)

In [7]:
rep1['MOM'] = ''
tab1 = pd.concat([rep1, rep2, rep3, rep4])
tab1.index = pd.MultiIndex.from_product([
    ['HML', 'HML ', 'HML-DEV', 'HML-DEV '], 
    ['est','t-stat','p<0.05', 'Adj.R^2']])
tab1

Unnamed: 0,Unnamed: 1,const,Mkt-RF,SMB,RMW,CMA,MOM
HML,est,-0.042,0.018,0.011,0.129,1.025,
HML,t-stat,-0.486,0.849,0.381,3.169,22.795,
HML,p<0.05,,,,(*),(*),
HML,Adj.R^2,0.500,,,,,
HML,est,0.050,-0.000,0.013,0.149,1.008,-0.129
HML,t-stat,0.587,-0.009,0.448,3.787,23.191,-6.822
HML,p<0.05,,,,(*),(*),(*)
HML,Adj.R^2,0.535,,,,,
HML-DEV,est,-0.014,0.066,0.014,-0.054,0.972,
HML-DEV,t-stat,-0.110,2.136,0.324,-0.913,14.783,


**Comments**


- The results here are consistent with those in the article.
- For HML, we observe its large and statistically significant exposures on RMW, CMA, and MOM. The negative exposure to MOM indicates that the return of HML and the return of MOM is negatively related. 
- HML has low, insignificant alpha when regressed on other factors. It bespeaks that the HML factor is kind of "redundant", in the sense that its risk premium can be almost fully expained by  the risk premium of some return series living in the space spanned by other factors excluding HML.
- HML-DEV can "resurrect" HML, in the sense that it still has large, significant alpha (0.363%, with t-statistic = 4.289) when regressed to other factors. This indicates that HML-DEV is not "redundent" in the above sense.

## 3-(e)

In [8]:
def test_hml(prd):
    X = sm.add_constant(prd[['Mkt-RF', 'SMB', 'RMW', 'CMA']])
    y = prd['HML'].values
    res1 = sm.OLS(y, X).fit()
    rep1 = report_less(res1)
    rep1['MOM'] = ''
    
    X = sm.add_constant(prd[['Mkt-RF', 'SMB', 'RMW', 'CMA', 'MOM']])
    res2 = sm.OLS(y, X).fit()
    rep2 = report_less(res2)
    
    X = sm.add_constant(prd[['Mkt-RF', 'SMB', 'RMW', 'CMA']])
    y = prd['HML-DEV']
    res3 = sm.OLS(y, X).fit()
    rep3 = report_less(res3)
    rep3['MOM'] = ''

    X = sm.add_constant(prd[['Mkt-RF', 'SMB', 'RMW', 'CMA','MOM']])
    res4 = sm.OLS(y, X).fit()
    rep4 = report_less(res4)
    return pd.concat([rep1, rep2, rep3, rep4])

### 1998 - 2018 Results

In [9]:
prd1 = df[(df.Date>=199800) & (df.Date<=201812)]
test_hml(prd1)

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM
est,-0.352,0.185,0.063,0.454,0.863,
t-stat,-2.408,4.898,1.209,7.109,12.248,
p<0.05,(*),(*),,(*),(*),
Adj.R^2,0.515,,,,,
est,-0.271,0.128,0.095,0.456,0.828,-0.128
t-stat,-1.917,3.366,1.887,7.445,12.181,-4.759
p<0.05,,(*),,(*),(*),(*)
Adj.R^2,0.554,,,,,
est,-0.377,0.359,0.002,0.322,0.863,
t-stat,-1.636,6.046,0.022,3.198,7.771,


### 1978 - 2008 Results

In [10]:
prd1 = df[(df.Date>=197800) & (df.Date<=200812)]
test_hml(prd1)

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM
est,0.023,-0.044,-0.087,0.173,0.911,
t-stat,0.208,-1.659,-2.339,3.754,16.527,
p<0.05,,,(*),(*),(*),
Adj.R^2,0.553,,,,,
est,0.118,-0.047,-0.053,0.212,0.904,-0.127
t-stat,1.113,-1.847,-1.469,4.717,17.022,-5.450
p<0.05,,,,(*),(*),(*)
Adj.R^2,0.586,,,,,
est,-0.008,-0.031,-0.192,-0.078,0.820,
t-stat,-0.051,-0.809,-3.551,-1.168,10.192,


### 1968 - 1988 Results

In [11]:
prd1 = df[(df.Date>=196800) & (df.Date<=198812)]
test_hml(prd1)

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM
est,0.236,-0.082,0.049,-0.319,0.874,
t-stat,1.982,-3.127,1.250,-3.850,11.732,
p<0.05,(*),(*),,(*),(*),
Adj.R^2,0.621,,,,,
est,0.276,-0.079,0.039,-0.293,0.877,-0.060
t-stat,2.294,-3.040,0.986,-3.521,11.841,-1.997
p<0.05,(*),(*),,(*),(*),(*)
Adj.R^2,0.626,,,,,
est,0.403,-0.112,0.124,-0.518,0.761,
t-stat,2.470,-3.139,2.324,-4.565,7.464,


**Comments**


- The results in (d) is not always the case over different sample period.
- Over 1998 - 2018, HML-DEV does not have significant alpha, but HML has significant negative alpha when regressed to the other FF 4 factors.
- Over 1978 - 2008, the results in (d) hold in a similar way.
- Over 1968 - 1988, Both HML and HML-DEV are not redundent (have significant positive alphas). Most pricing factors turn out to be more effective in early times. Nevertheless, HML-DEV still has better (larger, and more significant) alphas compared with HML. So it's legitimate to say that the paper's main idea still holds in this period.

### 3-(f)

In [12]:
X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA', 'MOM','HML-DEV']])
y = df['HML'].values
res5 = sm.OLS(y, X).fit()
rep5 = report_less(res5)

X = sm.add_constant(df[['Mkt-RF', 'SMB', 'RMW', 'CMA', 'MOM','HML']])
y = df['HML-DEV'].values
res6 = sm.OLS(y, X).fit()
rep6 = report_less(res6)

In [13]:
rep5

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM,HML-DEV
est,-0.231,0.006,-0.003,0.128,0.312,0.280,0.773
t-stat,-4.236,0.488,-0.155,5.096,8.657,15.407,29.913
p<0.05,(*),,,(*),(*),(*),(*)
Adj.R^2,0.813,,,,,,


In [14]:
rep6

Unnamed: 0,const,Mkt-RF,SMB,RMW,CMA,MOM,HML
est,0.325,-0.008,0.01,-0.087,0.120,-0.429,0.773
t-stat,6.041,-0.639,0.567,-3.450,3.171,-34.512,29.913
p<0.05,(*),,,(*),(*),(*),(*)
Adj.R^2,0.874,,,,,,


**Comments**


- HML-DEV has positive and significant alpha, while HML has negative significant alpha.
- Therefore, HML-DEC is more profitable when other factors are being controled.