In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime

# Step 1: Fetch Data
start_date = "2010-01-01"
end_date = "2024-12-31"
risk_free_ticker = "^IRX"  # Use the 13-week Treasury yield as a proxy for risk-free rate
market_index_ticker = "^GSPC"  # S&P 500 index
equity_tickers = ["AAPL", "MSFT", "GOOGL"]  # Replace with desired stock tickers

# Fetch data
risk_free_data = yf.download(risk_free_ticker, start=start_date, end=end_date)["Close"]
market_data = yf.download(market_index_ticker, start=start_date, end=end_date)["Close"]
stock_data = yf.download(equity_tickers, start=start_date, end=end_date)["Close"]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  3 of 3 completed


In [3]:
# Step 2: Prepare Data
# Calculate daily returns
market_returns = market_data.pct_change().dropna()
stock_returns = stock_data.pct_change().dropna()

# Calculate SMB and HML
# For simplicity, create synthetic SMB and HML factors
smb = stock_returns.mean(axis=1)  # Proxy for SMB
top_50 = stock_returns.quantile(0.5, axis=1)
hml = top_50 - stock_returns.mean(axis=1)  # Proxy for HML

In [4]:
hml

Date
2010-01-05    0.001107
2010-01-06   -0.000156
2010-01-07    0.001443
2010-01-08   -0.002062
2010-01-11   -0.001137
                ...   
2024-12-23   -0.002534
2024-12-24   -0.000111
2024-12-26   -0.001867
2024-12-27    0.000502
2024-12-30   -0.001777
Length: 3772, dtype: float64

In [5]:
smb

Date
2010-01-05   -0.000784
2010-01-06   -0.015751
2010-01-07   -0.011843
2010-01-08    0.008959
2010-01-11   -0.007684
                ...   
2024-12-23    0.005598
2024-12-24    0.009485
2024-12-26   -0.000734
2024-12-27   -0.015021
2024-12-30   -0.011463
Length: 3772, dtype: float64

In [6]:
# convert to pandas dataframe
market_returns = pd.DataFrame(market_returns).dropna()
stock_returns = pd.DataFrame(stock_returns).dropna()

# Calculate SMB
# SMB is typically calculated as the return of small-cap stocks minus large-cap stocks.
# For simplicity, we'll proxy SMB as the average return of stocks.
smb = stock_returns.mean(axis=1)  # Average return of all stocks, as a simple SMB proxy

# Calculate HML
# HML is the return of high book-to-market (value) stocks minus low book-to-market (growth) stocks.
# For simplicity, we'll use the top 50th percentile return as a proxy for high and average return for the mean.
top_50 = stock_returns.quantile(0.5, axis=1)  # Median return as a simple high proxy
hml = top_50 - stock_returns.mean(axis=1)  # Proxy for HML

stock_returns['smb'] = smb.values
stock_returns['top_50'] = top_50.values
stock_returns['hml'] = hml.values

In [7]:
# Convert risk-free rate from percentage to daily rate
risk_free_rate = risk_free_data / 100 / 252
risk_free_rate = risk_free_rate.reindex(market_returns.index, method="ffill")

# convert to pandas dataframe
risk_free_rate = pd.DataFrame(risk_free_rate).dropna()

In [8]:
# Merge All data together
merged_df = pd.merge(stock_returns, 
                    pd.merge(market_returns, risk_free_rate,  
                            left_index=True, right_index=True),
                    left_index=True, right_index=True)

In [9]:
# Calculate market excess return
merged_df['market_excess_return'] = merged_df['^GSPC'] - merged_df['^IRX']
merged_df['dt'] = merged_df.index

In [10]:
merged_df.dtypes

Ticker
AAPL                           float64
GOOGL                          float64
MSFT                           float64
smb                            float64
top_50                         float64
hml                            float64
^GSPC                          float64
^IRX                           float64
market_excess_return           float64
dt                      datetime64[ns]
dtype: object

In [11]:
merged_df['dt'].max()

Timestamp('2024-12-30 00:00:00')

In [12]:
# Train / Test Split

TRAIN_df = merged_df[merged_df['dt'] <= '2023-12-31']
TEST_df = merged_df[merged_df['dt'] > '2023-12-31']

print(TRAIN_df['dt'].max(), TRAIN_df['dt'].min())
print(TEST_df['dt'].max(), TEST_df['dt'].min())

2023-12-29 00:00:00 2010-01-05 00:00:00
2024-12-30 00:00:00 2024-01-02 00:00:00


In [19]:
# CAPM
factors = ['market_excess_return']
results = {}

for stock in equity_tickers:
    y = TRAIN_df[stock] - TRAIN_df['^IRX']
    x = TRAIN_df[factors]
    model = sm.OLS(y, x).fit()
    results[stock] = model

test_rf = TEST_df['^IRX'].mean()
test_market_excess_return = TEST_df['market_excess_return'].mean()

# Output results
for stock, model in results.items():
    print(f"Regression Results for {stock}:")
    # print(model.summary())
    print(model.params)
    CAPM_expected = test_rf + model.params[0] * test_market_excess_return
    stock_actual_mean = TEST_df[stock].mean()
    print(f"for {stock}, actual average return is {stock_actual_mean}, and CAPM expected return is {CAPM_expected}")
    print("\n")

Regression Results for AAPL:
market_excess_return    1.114707
dtype: float64
for AAPL, actual average return is 0.0011970940538656015, and CAPM expected return is 0.0009628192549331767


Regression Results for MSFT:
market_excess_return    1.121697
dtype: float64
for MSFT, actual average return is 0.0005874097312781221, and CAPM expected return is 0.0009676236912907859


Regression Results for GOOGL:
market_excess_return    1.08826
dtype: float64
for GOOGL, actual average return is 0.001422002213398059, and CAPM expected return is 0.0009446430097880882




  CAPM_expected = test_rf + model.params[0] * test_market_excess_return


In [20]:
# Three-factor
factors = ['market_excess_return', 'smb', 'hml']
results = {}

for stock in equity_tickers:
    y = TRAIN_df[stock] - TRAIN_df['^IRX']
    x = TRAIN_df[factors]
    model = sm.OLS(y, x).fit()
    results[stock] = model

test_rf = TEST_df['^IRX'].mean()
test_market_excess_return = TEST_df['market_excess_return'].mean()
test_smb = TEST_df['smb'].mean()
test_hml = TEST_df['hml'].mean()


# Output results
for stock, model in results.items():
    print(f"Regression Results for {stock}:")
    # print(model.summary())
    print(model.params)
    Three_model_expected = test_rf + model.params[0] * test_market_excess_return + + model.params[1] * test_smb + + model.params[2] * test_hml
    print(f"for {stock}, actual average return is {stock_actual_mean}, and CAPM expected return is {Three_model_expected}")
    print("\n")

Regression Results for AAPL:
market_excess_return   -0.002129
smb                     1.004474
hml                    -0.491987
dtype: float64
for AAPL, actual average return is 0.001422002213398059, and CAPM expected return is 0.0013350220281672937


Regression Results for MSFT:
market_excess_return    0.103845
smb                     0.921136
hml                     0.424937
dtype: float64
for MSFT, actual average return is 0.001422002213398059, and CAPM expected return is 0.0011954900265198575


Regression Results for GOOGL:
market_excess_return   -0.100825
smb                     1.073300
hml                     0.066674
dtype: float64
for GOOGL, actual average return is 0.001422002213398059, and CAPM expected return is 0.0012656365197047494




  Three_model_expected = test_rf + model.params[0] * test_market_excess_return + + model.params[1] * test_smb + + model.params[2] * test_hml


In [17]:
alpha = model.params[0]
print(alpha)

1.0882598082971544


  alpha = model.params[0]


In [None]:
# Step 3: Run Regression for Each Stock and Test

results = {}
factors = ['smb', 'hml', 'market_excess_return']
for stock in equity_tickers:
    y = merged_df[stock] - merged_df['^IRX']
    x = sm.add_constant(merged_df[factors])
    model = sm.OLS(y, x).fit()
    results[stock] = model

# Output results
for stock, model in results.items():
    print(f"Regression Results for {stock}:")
    print(model.summary())
    print("\n")

In [16]:
all_data = pd.DataFrame(stock_returns).dropna()

In [None]:
all_data.columns

In [7]:
# Calculate market excess return
market_excess_return = market_returns - risk_free_rate

In [None]:
market_excess_return

In [None]:
risk_free_rate = risk_free_data / 100 / 252


In [None]:
yf.download(['TSLA'], start=start_date, end=end_date)

In [None]:
IRX