In [5]:
# Custom function to transform the date in 5 factors dataframe
def to_date(row):
    year = str(row)[:4]
    month = str(row)[4:6]
    day = str(row)[6:]

    return pd.to_datetime('-'.join([year, month, day]))

In [12]:
# Import dependencies
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

In [31]:
# Read the Fama and French factors
factors = pd.read_csv('F-F_Research_Data_5_Factors_2x3_daily.csv')
factors['Date'] = factors.Date.apply(to_date)

In [32]:
# Get the historical data of Microsoft
msft = yf.Ticker("MSFT")
msft = msft.history(period="max")

# Reset the index (Date) for merging
spy.reset_index(inplace=True)
msft.reset_index(inplace=True)

# Calculate the return
msft['Return'] = msft['Close'].pct_change(1)

In [33]:
# Merging the dataframes and remove row that contains Nan values
df = pd.merge(factors, msft[['Date', 'Return']], on='Date')
df['Return - RF'] = df['Return'] - df['RF']
df.dropna(inplace=True)
df

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF,Return,Return - RF
1,1986-03-14,1.03,-0.83,-0.21,0.11,0.31,0.03,0.035712,0.005712
2,1986-03-17,-0.75,0.00,-0.32,0.38,0.05,0.03,0.017250,-0.012750
3,1986-03-18,0.47,0.04,-0.16,-0.07,0.30,0.03,-0.025431,-0.055431
4,1986-03-19,-0.17,0.16,-0.06,0.15,0.15,0.03,-0.017391,-0.047391
5,1986-03-20,0.39,-0.11,0.02,0.12,0.12,0.03,-0.026547,-0.056547
...,...,...,...,...,...,...,...,...,...
9060,2022-02-22,-1.18,-0.37,0.12,-0.06,0.12,0.00,-0.000729,-0.000729
9061,2022-02-23,-1.96,0.07,1.25,0.00,1.07,0.00,-0.025893,-0.025893
9062,2022-02-24,1.88,0.67,-3.85,-1.74,-1.75,0.00,0.051094,0.051094
9063,2022-02-25,2.23,-0.12,1.20,0.35,-0.19,0.00,0.009233,0.009233


# 3-Factor Model

> ## r<sub>t</sub> - r<sub>t, f</sub> = α + β<sub>mkt</sub>(r<sub>t, mkt</sub> − r<sub>t, f</sub>) + β<sub>SMB</sub>r<sub>t, SMB</sub> + β<sub>HML</sub>r<sub>t,HML</sub> + ε<sub>t</sub>

where α<sub>i</sub> is the excess return
      <br>
      SMB (Small Minus Big) is the average return on three small portfolios minus the average return on three big portfolios,

> SMB =	1/3 (Small Value + Small Neutral + Small Growth) - 1/3 (Big Value + Big Neutral + Big Growth).	

HML (High Minus Low) is the average return on two value portfolios minus the average return on two growth portfolios,
 
> HML =	1/2 (Small Value + Big Value) - 1/2 (Small Growth + Big Growth).

A rate of return (RoR) is the net gain or loss of an investment over a specified time period, expressed as a percentage of the investment’s initial cost.

risk free rate is the rate of return of an investment with zero risk.

In [36]:
# Fit data using linear regression
X = df[df.Date.dt.year >= 2017][['Mkt-RF', 'SMB', 'HML']]
X = X.dropna()
y = df[df.Date.dt.year >= 2017]['Return - RF']

# Add constant term (alpha and error term)
X = sm.add_constant(X)
lr = sm.OLS(y, X)
res = lr.fit()
res.summary()

0,1,2,3
Dep. Variable:,Return - RF,R-squared:,0.766
Model:,OLS,Adj. R-squared:,0.765
Method:,Least Squares,F-statistic:,1408.0
Date:,"Sat, 16 Apr 2022",Prob (F-statistic):,0.0
Time:,17:15:55,Log-Likelihood:,4328.5
No. Observations:,1298,AIC:,-8649.0
Df Residuals:,1294,BIC:,-8628.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0034,0.000,-14.088,0.000,-0.004,-0.003
Mkt-RF,0.0124,0.000,62.721,0.000,0.012,0.013
SMB,-0.0038,0.000,-10.654,0.000,-0.005,-0.003
HML,-0.0039,0.000,-15.137,0.000,-0.004,-0.003

0,1,2,3
Omnibus:,198.454,Durbin-Watson:,1.797
Prob(Omnibus):,0.0,Jarque-Bera (JB):,945.464
Skew:,0.629,Prob(JB):,4.95e-206
Kurtosis:,6.987,Cond. No.,2.01


R<sup>2</sup> of 76.6% means 76.6% of variance for a dependent variable that's explained by an independent variable or variables.
<br>
Alpha of -0.0034 means the investment underperformed its benchmark index by -0.34%.
<br>
Beta of 0.0124 suggest that the stock has a lower than average risk.