In [1]:
# Factor Attribution (Why Returns Happened/Reason behind the returns) :

# This phase answers why retrurns come ?
# Because of market or because of stock itself ?
# Means is this a performance skill of a stock or just flow with the market ? 

In [2]:
import pandas as pd
import numpy as np
import sys
sys.path.append("..")

In [3]:
ff_factor = pd.read_csv("../data/processed/ff_factor_final.csv", header=[0, 1],index_col=0)
prices = pd.read_csv("../data/processed/prices_final.csv", header=[0,1], index_col=0)
returns = prices.pct_change().dropna()
ff_factor

Date,Mkt-RF,SMB,HML,RF
2010-01-04,1.69,0.61,1.14,0.0
2010-01-05,0.31,-0.64,1.22,0.00
2010-01-06,0.13,-0.23,0.55,0.00
2010-01-07,0.40,0.09,0.96,0.00
2010-01-08,0.33,0.36,0.02,0.00
2010-01-11,0.13,-0.13,-0.26,0.00
...,...,...,...,...
2024-12-23,0.61,-0.71,-0.20,0.02
2024-12-24,1.11,-0.09,-0.06,0.02
2024-12-26,0.01,1.04,-0.18,0.02
2024-12-27,-1.17,-0.65,0.57,0.02


In [4]:

#& statsmodels is a library used for statistical models.
import statsmodels.api as sm

In [5]:
# Risk Free Rate : We must compare returns above risk-free return
risk_free_rate = ff_factor['RF']/100

In [6]:
# Calculating stock excess returns : 

excess_returns = returns.sub(risk_free_rate.values, axis=0)
# Excess return = Asset return − Risk-free return

In [7]:
# Factor returns : This is classic Fama-French 3-factor model.
factors = ff_factor[["Mkt-RF", "SMB", "HML"]] / 100

In [8]:
factor_results = {}   # an empty dictionary 
price_returns = excess_returns.xs('Close', axis=1, level=0)
price_returns

Ticker,AAPL,AMZN,BAC,CAT,CVX,GOOGL,JNJ,JPM,MMM,MSFT,PFE,PG,WFC,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-01-05,0.001729,0.005900,0.032505,0.011955,0.007084,-0.004404,-0.011595,0.019370,-0.006263,0.000323,-0.014263,0.000327,0.027452,-0.009957,0.003904
2010-01-06,-0.015906,-0.018116,0.011729,0.003038,0.000125,-0.025209,0.008134,0.005494,0.014182,-0.006137,-0.003215,-0.004743,0.001425,-0.002235,0.008643
2010-01-07,-0.001849,-0.017013,0.032947,0.004038,-0.003767,-0.023280,-0.007138,0.019809,0.000717,-0.010400,-0.003764,-0.005424,0.036286,0.000560,-0.003142
2010-01-08,0.006648,0.027077,-0.008860,0.011229,0.001765,0.013331,0.003438,-0.002456,0.007046,0.006896,0.008095,-0.001322,-0.009269,-0.005038,-0.004012
2010-01-11,-0.008821,-0.024041,0.008939,0.062810,0.017743,-0.001511,0.000156,-0.003358,-0.004032,-0.012720,0.008030,-0.003971,-0.002078,0.016501,0.011220
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-23,0.002865,0.000422,-0.006539,-0.001976,0.000640,0.016622,0.005338,0.003125,-0.002520,-0.003292,0.013078,0.000098,0.002644,-0.020690,0.003862
2024-12-24,0.011278,0.017529,0.010964,0.005766,0.005885,0.007404,0.003793,0.016244,0.010499,0.009174,0.000923,0.004737,0.014685,0.025589,0.000741
2024-12-26,0.002976,-0.008932,0.003630,-0.001424,0.000773,-0.002801,-0.002051,0.003225,0.006090,-0.002977,-0.006931,0.007022,0.002175,0.000987,0.000646
2024-12-27,-0.013442,-0.014734,-0.004914,-0.006356,-0.000061,-0.014719,-0.003841,-0.008302,-0.007823,-0.017502,0.002059,-0.003902,-0.009258,-0.012378,-0.000294


In [9]:
# Deciding which statistical model we are going to use : 

# Basically stock’s excess return is explained by common risk factors, and how much is unexplained (alpha)?
# So, OLS is the Best Linear Unbiased Estimator (BLUE) under these assumptions.
# OLS provides :
# 1. Alpha intercept (add_constant()) : This is CAPM / Fama–French alpha.
# 2. Betas with economic meaning
# 3. Statistical inference :
# OLS gives you:
#    t-statistics
#    p-values
#    confidence intervals
#    R²

# Suppose we have a scattered data, and we want to draw a linear regression line relation on taht data with minimum errors : 
# So, equation of line : y = a+bx
# To minimizee errors, if we find such value of a,b having minimum errors and put it in the equation,
# then we get the equation having minimum errors and this method is called OLS method

# All these factors makes OLS an industry standard.

# Notes : 
# Alpha : An alpha of a stock is an excess return generated by the stock in comparison to the benchmark index 
# Beta : It measure the volatality of an investment instrument as compared to the benchmark index

In [10]:
price_returns = excess_returns.xs('Close', axis=1, level=0)

for stock in price_returns.columns :
    
    y = price_returns[stock].dropna()
    X = factors.loc[y.index]
    X = sm.add_constant(X)      # added alpha constant 
    model = sm.OLS(y,X).fit()   # Ordinary Least Squares (regression)
    factor_results[stock] = model

In [11]:
factor_results.keys()
factor_results['AAPL'].summary()


0,1,2,3
Dep. Variable:,AAPL,R-squared:,0.508
Model:,OLS,Adj. R-squared:,0.508
Method:,Least Squares,F-statistic:,1299.0
Date:,"Thu, 25 Dec 2025",Prob (F-statistic):,0.0
Time:,17:28:50,Log-Likelihood:,11236.0
No. Observations:,3772,AIC:,-22460.0
Df Residuals:,3768,BIC:,-22440.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0005,0.000,2.264,0.024,6.09e-05,0.001
"('Mkt-RF', '1.69')",1.1123,0.019,59.455,0.000,1.076,1.149
"('SMB', '0.61')",-0.2595,0.034,-7.687,0.000,-0.326,-0.193
"('HML', '1.14')",-0.4437,0.026,-17.274,0.000,-0.494,-0.393

0,1,2,3
Omnibus:,565.798,Durbin-Watson:,1.919
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9008.176
Skew:,0.055,Prob(JB):,0.0
Kurtosis:,10.57,Cond. No.,171.0


In [12]:
# Calculating beta : 

model.params              # Gives the alpha and betas of the stock


const            -0.000047
(Mkt-RF, 1.69)    0.847294
(SMB, 0.61)      -0.112169
(HML, 1.14)       0.825064
dtype: float64

In [13]:
temp_dict = {}

for stock, model in factor_results.items() :
    temp_dict[stock] = model.params

# converting dictionary to dataframe : 

temp_df = pd.DataFrame(temp_dict)
temp_df

Unnamed: 0,AAPL,AMZN,BAC,CAT,CVX,GOOGL,JNJ,JPM,MMM,MSFT,PFE,PG,WFC,WMT,XOM
const,0.000454,0.000429,-0.000105,0.00019,-4.3e-05,0.000149,3.3e-05,0.000115,-7.4e-05,0.000203,-4.7e-05,8.9e-05,-4.9e-05,0.000257,-4.7e-05
"(Mkt-RF, 1.69)",1.112253,1.14453,1.337126,1.080107,0.974869,1.097043,0.573036,1.172155,0.843982,1.149423,0.667125,0.564957,1.169845,0.515413,0.847294
"(SMB, 0.61)",-0.259529,-0.130724,-0.019381,0.23709,-0.131406,-0.225213,-0.361237,-0.175875,-0.004427,-0.426452,-0.29734,-0.485324,-0.019104,-0.30206,-0.112169
"(HML, 1.14)",-0.443708,-0.764774,1.188732,0.639621,0.8338,-0.43334,0.06688,0.971985,0.315514,-0.466237,0.081499,0.018816,1.087457,-0.076084,0.825064


In [14]:
temp_df = temp_df.T

In [15]:
temp_df.columns

Index(['const', ('Mkt-RF', '1.69'), ('SMB', '0.61'), ('HML', '1.14')], dtype='object')

In [16]:
# Correcting the columns : 

new_col = []

for col in temp_df.columns :
    if type(col) == tuple :
        new_col.append(col[0])
    else :
        new_col.append(col)

temp_df.columns = new_col


In [17]:
temp_df

Unnamed: 0,const,Mkt-RF,SMB,HML
AAPL,0.000454,1.112253,-0.259529,-0.443708
AMZN,0.000429,1.14453,-0.130724,-0.764774
BAC,-0.000105,1.337126,-0.019381,1.188732
CAT,0.00019,1.080107,0.23709,0.639621
CVX,-4.3e-05,0.974869,-0.131406,0.8338
GOOGL,0.000149,1.097043,-0.225213,-0.43334
JNJ,3.3e-05,0.573036,-0.361237,0.06688
JPM,0.000115,1.172155,-0.175875,0.971985
MMM,-7.4e-05,0.843982,-0.004427,0.315514
MSFT,0.000203,1.149423,-0.426452,-0.466237


In [18]:
# Now, in temp_df we have a table consisting the alpha and betas of all the stocks 
# where alpha is named as const and betas are named as : (Mkt-RF, 1.69), (SMB, 0.61), (HML, 1.14)

In [19]:
# Extracting alpha and betas :

# Alpha : 

alpha = temp_df['const']
beta1 = temp_df['Mkt-RF']
beta2 = temp_df['SMB']
beta3 = temp_df['HML']


In [20]:
# Finally we have succesfully calculated alpha and betas which can be used in : 
# 1. Portfolio construction
# 2. Risk budgeting
# 3. Portfolio diversification

In [21]:
# Frzeeing of imp data : 

price_returns.to_csv("../data/processed/price_returns_final.csv", index=True)