### Causal Modeling with Fama French Models

In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

factors = pdr.get_data_famafrench(
    'F-F_Research_Data_Factors_weekly', 
    start = '2000-01-01'
)[0] * 0.01

In [34]:
factors

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-07,-0.0249,-0.0026,0.0089,0.00103
2000-01-14,0.0207,0.0196,-0.0150,0.00103
2000-01-21,0.0002,0.0694,-0.0281,0.00103
2000-01-28,-0.0571,0.0017,0.0175,0.00103
2000-02-04,0.0446,0.0126,-0.0294,0.00108
...,...,...,...,...
2024-05-31,-0.0069,0.0065,0.0024,0.00110
2024-06-07,0.0091,-0.0218,-0.0201,0.00102
2024-06-14,0.0131,-0.0132,-0.0250,0.00102
2024-06-21,0.0052,-0.0072,0.0063,0.00102


In [53]:
asset = yf.download(
    'AAPL',
    start = '1999-12-31',
    end = '2024-07-01',
    progress = False,
    interval = '1d'
)

In [54]:
asset_ret = asset['Close'].resample('W-FRI').last().pct_change().dropna()

In [55]:
factors.index = asset_ret.index

In [56]:
excess_ret = asset_ret - factors['RF']

In [57]:
excess_ret

Date
2000-01-07   -0.033249
2000-01-14    0.008392
2000-01-21    0.107247
2000-01-28   -0.088060
2000-02-04    0.061651
                ...   
2024-05-31    0.010849
2024-06-07    0.023115
2024-06-14    0.078212
2024-06-21   -0.024551
2024-06-28    0.014065
Freq: W-FRI, Length: 1278, dtype: float64

Model 1 : until 2008

In [58]:
import statsmodels.api as sm

controls = factors[['Mkt-RF','HML']]
treatment = factors['SMB']
y = excess_ret

treatment_model = sm.OLS(
    treatment, sm.add_constant(controls)
).fit()

print(treatment_model.summary())

                            OLS Regression Results                            
Dep. Variable:                    SMB   R-squared:                       0.079
Model:                            OLS   Adj. R-squared:                  0.077
Method:                 Least Squares   F-statistic:                     54.41
Date:                Sat, 24 Aug 2024   Prob (F-statistic):           2.11e-23
Time:                        01:33:17   Log-Likelihood:                 3690.1
No. Observations:                1278   AIC:                            -7374.
Df Residuals:                    1275   BIC:                            -7359.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0001      0.000      0.360      0.7

In [59]:
explain_model = sm.OLS(
    y, sm.add_constant(controls)
).fit()

print(explain_model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.320
Model:                            OLS   Adj. R-squared:                  0.319
Method:                 Least Squares   F-statistic:                     300.0
Date:                Sat, 24 Aug 2024   Prob (F-statistic):          1.63e-107
Time:                        01:33:18   Log-Likelihood:                 2235.0
No. Observations:                1278   AIC:                            -4464.
Df Residuals:                    1275   BIC:                            -4449.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0040      0.001      3.371      0.0

In [63]:
t_tilde = treatment_model.resid
y_tilde = explain_model.resid

t_tilde.name = 't_tilde'
y_tilde.name = 'y_tilde'

model = sm.OLS(
    y, sm.add_constant(t_tilde)
).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.1298
Date:                Sat, 24 Aug 2024   Prob (F-statistic):              0.719
Time:                        01:35:29   Log-Likelihood:                 1988.6
No. Observations:                1278   AIC:                            -3973.
Df Residuals:                    1276   BIC:                            -3963.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0053      0.001      3.686      0.0