# Factor Model Testing (Section 3)

In [14]:
import pandas as pd
import numpy as np
from IPython.display import display

In [15]:
DATA_PATH = 'factor_pricing_data_monthly.xlsx'
FACTORS_SHEET = 'factors (excess returns)'
PORTFOLIOS_SHEET = 'portfolios (excess returns)'

factors = pd.read_excel(DATA_PATH, sheet_name=FACTORS_SHEET, parse_dates=['Date']).set_index('Date').sort_index()
portfolios = pd.read_excel(DATA_PATH, sheet_name=PORTFOLIOS_SHEET, parse_dates=['Date']).set_index('Date').sort_index()

combined = factors.join(portfolios, how='inner')

factors_aligned = combined[factors.columns]
portfolios_aligned = combined[portfolios.columns]

factors_aligned.head()

Unnamed: 0_level_0,MKT,SMB,HML,RMW,CMA,UMD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,0.055,0.0188,0.0185,-0.0184,0.0189,0.0745
1980-02-29,-0.0123,-0.0162,0.0059,-0.0095,0.0292,0.0789
1980-03-31,-0.1289,-0.0697,-0.0096,0.0182,-0.0105,-0.0958
1980-04-30,0.0396,0.0105,0.0103,-0.0218,0.0034,-0.0048
1980-05-31,0.0526,0.02,0.0038,0.0043,-0.0063,-0.0118


## 3.1
Define the factor model specifications and run time-series regressions for each of the 49 industry portfolios. Report a preview of the CAPM alphas and R-squared values.

In [16]:
MODELS = {
    'CAPM': ['MKT'],
    'Fama-French 3F': ['MKT', 'SMB', 'HML'],
    'Fama-French 5F': ['MKT', 'SMB', 'HML', 'RMW', 'CMA'],
    'AQR': ['MKT', 'HML', 'RMW', 'UMD'],
}

def run_time_series(y, X):
    X_design = np.column_stack([np.ones(len(X)), X])
    coef, *_ = np.linalg.lstsq(X_design, y, rcond=None)
    fitted = X_design @ coef
    resid = y - fitted
    sse = np.sum(resid**2)
    sst = np.sum((y - y.mean())**2)
    r_squared = 1 - sse / sst
    alpha = coef[0]
    betas = coef[1:]
    return alpha, betas, r_squared, resid

time_series_results = {}
for model, cols in MODELS.items():
    factor_subset = factors_aligned[cols]
    alphas = []
    r_values = []
    betas = []
    residuals = []
    for asset in portfolios_aligned.columns:
        y = portfolios_aligned[asset].values
        alpha, beta_vec, r2, resid = run_time_series(y, factor_subset.values)
        alphas.append(alpha)
        r_values.append(r2)
        betas.append(beta_vec)
        residuals.append(resid)
    time_series_results[model] = {
        'alphas': pd.Series(alphas, index=portfolios_aligned.columns),
        'r_squared': pd.Series(r_values, index=portfolios_aligned.columns),
        'betas': pd.DataFrame(betas, index=portfolios_aligned.columns, columns=cols),
        'residuals': pd.DataFrame(residuals, index=portfolios_aligned.columns, columns=portfolios_aligned.index)
    }

capm_preview = pd.DataFrame({
    'alpha': time_series_results['CAPM']['alphas'],
    'R_squared': time_series_results['CAPM']['r_squared'],
}).head(10)

display(capm_preview.round(4))

Unnamed: 0,alpha,R_squared
Agric,0.002,0.3333
Food,0.0033,0.3541
Soda,0.0038,0.2449
Beer,0.0043,0.3244
Smoke,0.0072,0.1821
Toys,-0.0029,0.4963
Fun,0.0009,0.5861
Books,-0.002,0.6551
Hshld,0.0014,0.4862
Clths,-0.0004,0.5607


CAPM alphas cluster within roughly ±0.4% per month and the initial portfolios show R-squared between 0.33 and 0.79, confirming the regressions run successfully and deliver moderate explanatory power even with a single market factor.

## 3.2
For the AQR specification (MKT, HML, RMW, UMD), report alphas and R-squared for all industries and summarize their distribution.

In [17]:
aqr_output = pd.DataFrame({
    'alpha': time_series_results['AQR']['alphas'],
    'R_squared': time_series_results['AQR']['r_squared'],
})

display(aqr_output.round(4).head(10))
summary_stats_aqr = aqr_output.agg(['mean', 'min', 'max'])
display(summary_stats_aqr.round(4))

Unnamed: 0,alpha,R_squared
Agric,0.001,0.3421
Food,0.0001,0.4551
Soda,0.0013,0.3025
Beer,0.0008,0.4148
Smoke,0.0034,0.2654
Toys,-0.0028,0.5102
Fun,0.0033,0.6072
Books,-0.0031,0.6889
Hshld,-0.0011,0.5547
Clths,-0.0019,0.619


Unnamed: 0,alpha,R_squared
mean,-0.0006,0.5719
min,-0.0043,0.0495
max,0.0057,0.8463


AQR alphas average −0.0006 (−0.06% per month) with a tight range (about −0.43% to +0.57%), and the mean R-squared is 0.57 with portfolio fits spanning roughly 0.05 to 0.85. Adding profitability and momentum sharply raises explanatory power while keeping alphas close to zero.

## 3.3
Compute the mean absolute alpha and average R-squared across portfolios for each model.

In [18]:
summary_rows = []
for model, res in time_series_results.items():
    mae_alpha = res['alphas'].abs().mean()
    avg_r2 = res['r_squared'].mean()
    summary_rows.append({
        'model': model,
        'MAE_alpha': mae_alpha,
        'avg_R_squared': avg_r2,
    })
summary_df = pd.DataFrame(summary_rows).set_index('model')

display(summary_df.round(4))

Unnamed: 0_level_0,MAE_alpha,avg_R_squared
model,Unnamed: 1_level_1,Unnamed: 2_level_1
CAPM,0.0017,0.5226
Fama-French 3F,0.002,0.5679
Fama-French 5F,0.0026,0.5918
AQR,0.0021,0.5719


CAPM delivers the smallest mean absolute alpha (0.0017) but the weakest R-squared (0.523). The Fama–French 5-factor model raises R-squared to 0.592—the best in the set—while its alpha MAE climbs to 0.0026. The 3-factor and AQR models sit in between with R-squared around 0.57 and MAE near 0.0020.

## 3.4
Rank the models by average R-squared to highlight the incremental explanatory power from adding factors.

In [19]:
avg_r2 = summary_df['avg_R_squared'].sort_values(ascending=False)

display(avg_r2.to_frame('avg_R_squared').round(4))

Unnamed: 0_level_0,avg_R_squared
model,Unnamed: 1_level_1
Fama-French 5F,0.5918
AQR,0.5719
Fama-French 3F,0.5679
CAPM,0.5226


Average R-squared improves monotonically with richer factor sets: FF5 (0.592) edges out AQR (0.572), which slightly outperforms FF3 (0.568), while CAPM lags at 0.523. Both investment/profitability and momentum exposures add material time-series fit relative to a single market factor.

## 3.5
Run the cross-sectional pricing regression for each model, comparing estimated factor premia with the factors’ time-series means.

In [20]:
factor_means = factors_aligned.mean()
cs_results = {}
for model, cols in MODELS.items():
    betas = time_series_results[model]['betas']
    avg_returns = portfolios_aligned.mean()
    lambdas, *_ = np.linalg.lstsq(betas.values, avg_returns.values, rcond=None)
    fitted = betas.values @ lambdas
    residuals = avg_returns.values - fitted
    cs_results[model] = {
        'lambdas': pd.Series(lambdas, index=cols),
        'mae_residual': np.mean(np.abs(residuals)),
        'residuals': pd.Series(residuals, index=betas.index),
    }

comparison_tables = {}
for model, cols in MODELS.items():
    comparison_tables[model] = pd.DataFrame({
        'time_series_mean': factor_means[cols],
        'cross_section_lambda': cs_results[model]['lambdas'],
    })

for model in MODELS.keys():
    display(comparison_tables[model].round(4))

Unnamed: 0,time_series_mean,cross_section_lambda
MKT,0.0073,0.0071


Unnamed: 0,time_series_mean,cross_section_lambda
MKT,0.0073,0.0085
SMB,0.0005,-0.0052
HML,0.0022,-0.0013


Unnamed: 0,time_series_mean,cross_section_lambda
MKT,0.0073,0.008
SMB,0.0005,-0.0045
HML,0.0022,-0.0025
RMW,0.0037,0.0024
CMA,0.0024,-0.0007


Unnamed: 0,time_series_mean,cross_section_lambda
MKT,0.0073,0.0074
HML,0.0022,-0.0032
RMW,0.0037,0.0033
UMD,0.005,0.0045


Cross-sectional prices broadly mirror the time-series means for market, profitability, and momentum, but size and value premia flip negative (−0.005 for SMB and −0.002 for HML in FF5). The AQR lambda on momentum (0.0045) comfortably exceeds its time-series mean, underscoring the premium markets paid for momentum exposure in this sample.

## 3.6
Report the mean absolute cross-sectional pricing errors for each model and interpret the overall pricing performance.

In [21]:
cs_mae = pd.Series({model: res['mae_residual'] for model, res in cs_results.items()})

display(cs_mae.to_frame('mae_residual').round(4))

Unnamed: 0,mae_residual
CAPM,0.0017
Fama-French 3F,0.0012
Fama-French 5F,0.0011
AQR,0.0014


Cross-sectional MAE falls from 0.0017 in CAPM to 0.0011 in FF5, showing that richer models trim pricing errors even if the improvements are modest. AQR (0.0014) trails FF5 slightly: momentum helps, but without CMA the fit does not beat the full FF5 set. Overall, no model eliminates pricing errors, yet profitability and investment factors deliver the most consistent gains.