In [21]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime

# Step 1: Fetch Data
start_date = "2010-01-01"
end_date = "2023-12-31"
risk_free_ticker = "^IRX"  # Use the 13-week Treasury yield as a proxy for risk-free rate
market_index_ticker = "^GSPC"  # S&P 500 index
equity_tickers = ["AAPL", "MSFT", "GOOGL"]  # Replace with desired stock tickers

# Fetch data
risk_free_data = yf.download(risk_free_ticker, start=start_date, end=end_date)["Close"]
market_data = yf.download(market_index_ticker, start=start_date, end=end_date)["Close"]
stock_data = yf.download(equity_tickers, start=start_date, end=end_date)["Close"]

[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  3 of 3 completed


In [22]:
# Step 2: Prepare Data
# Calculate daily returns
market_returns = market_data.pct_change().dropna()
stock_returns = stock_data.pct_change().dropna()

# Calculate SMB and HML
# For simplicity, create synthetic SMB and HML factors
smb = stock_returns.mean(axis=1)  # Proxy for SMB
top_50 = stock_returns.quantile(0.5, axis=1)
hml = top_50 - stock_returns.mean(axis=1)  # Proxy for HML

In [23]:
hml

Date
2010-01-05    0.001107
2010-01-06   -0.000156
2010-01-07    0.001443
2010-01-08   -0.002062
2010-01-11   -0.001137
                ...   
2023-12-22    0.001165
2023-12-26    0.001017
2023-12-27    0.001486
2023-12-28    0.000738
2023-12-29   -0.001434
Length: 3521, dtype: float64

In [24]:
smb

Date
2010-01-05   -0.000784
2010-01-06   -0.015751
2010-01-07   -0.011843
2010-01-08    0.008959
2010-01-11   -0.007684
                ...   
2023-12-22    0.001619
2023-12-26   -0.000805
2023-12-27   -0.003061
2023-12-28    0.001488
2023-12-29   -0.002417
Length: 3521, dtype: float64

In [32]:
# convert to pandas dataframe
market_returns = pd.DataFrame(market_returns).dropna()
stock_returns = pd.DataFrame(stock_returns).dropna()

# Calculate SMB
# SMB is typically calculated as the return of small-cap stocks minus large-cap stocks.
# For simplicity, we'll proxy SMB as the average return of stocks.
smb = stock_returns.mean(axis=1)  # Average return of all stocks, as a simple SMB proxy

# Calculate HML
# HML is the return of high book-to-market (value) stocks minus low book-to-market (growth) stocks.
# For simplicity, we'll use the top 50th percentile return as a proxy for high and average return for the mean.
top_50 = stock_returns.quantile(0.5, axis=1)  # Median return as a simple high proxy
hml = top_50 - stock_returns.mean(axis=1)  # Proxy for HML

stock_returns['smb'] = smb.values
stock_returns['top_50'] = top_50.values
stock_returns['hml'] = hml.values

In [34]:
# Convert risk-free rate from percentage to daily rate
risk_free_rate = risk_free_data / 100 / 252
risk_free_rate = risk_free_rate.reindex(market_returns.index, method="ffill")

# convert to pandas dataframe
risk_free_rate = pd.DataFrame(risk_free_rate).dropna()

In [35]:
# Merge All data together
merged_df = pd.merge(stock_returns, 
                    pd.merge(market_returns, risk_free_rate,  
                            left_index=True, right_index=True),
                    left_index=True, right_index=True)

In [36]:
# Calculate market excess return
merged_df['market_excess_return'] = merged_df['^GSPC'] - merged_df['^IRX']

In [37]:
merged_df

Ticker,AAPL,GOOGL,MSFT,smb,top_50,hml,^GSPC,^IRX,market_excess_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-05,0.001729,-0.004404,0.000323,-0.000784,0.000323,0.001107,0.003116,2.380952e-06,0.003113
2010-01-06,-0.015907,-0.025209,-0.006136,-0.015751,-0.015907,-0.000156,0.000546,1.785714e-06,0.000544
2010-01-07,-0.001849,-0.023279,-0.010400,-0.011843,-0.010400,0.001443,0.004001,1.785714e-06,0.003999
2010-01-08,0.006648,0.013331,0.006896,0.008959,0.006896,-0.002062,0.002882,1.587302e-06,0.002880
2010-01-11,-0.008821,-0.001511,-0.012720,-0.007684,-0.008821,-0.001137,0.001747,9.920635e-07,0.001746
...,...,...,...,...,...,...,...,...,...
2023-12-22,-0.005548,0.007620,0.002784,0.001619,0.002784,0.001165,0.001660,2.066667e-04,0.001453
2023-12-26,-0.002841,0.000212,0.000214,-0.000805,0.000212,0.001017,0.004232,2.064683e-04,0.004025
2023-12-27,0.000518,-0.008126,-0.001575,-0.003061,-0.001575,0.001486,0.001430,2.077381e-04,0.001223
2023-12-28,0.002226,-0.000997,0.003235,0.001488,0.002226,0.000738,0.000370,2.070635e-04,0.000163


In [39]:
# Step 3: Run Regression for Each Stock
results = {}
factors = ['smb', 'hml', 'market_excess_return']
for stock in equity_tickers:
    y = merged_df[stock] - merged_df['^IRX']
    x = sm.add_constant(merged_df[factors])
    model = sm.OLS(y, x).fit()
    results[stock] = model

# Output results
for stock, model in results.items():
    print(f"Regression Results for {stock}:")
    print(model.summary())
    print("\n")

Regression Results for AAPL:
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.726
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                     3110.
Date:                Fri, 03 Jan 2025   Prob (F-statistic):               0.00
Time:                        16:48:54   Log-Likelihood:                 11477.
No. Observations:                3521   AIC:                        -2.295e+04
Df Residuals:                    3517   BIC:                        -2.292e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
con

In [16]:
all_data = pd.DataFrame(stock_returns).dropna()

In [None]:
all_data.columns

In [7]:
# Calculate market excess return
market_excess_return = market_returns - risk_free_rate

In [None]:
market_excess_return

In [None]:
risk_free_rate = risk_free_data / 100 / 252


In [None]:
yf.download(['TSLA'], start=start_date, end=end_date)

In [None]:
IRX