# Factor Regression Demo

This notebook demonstrates how to use functions from 'vbase_py_utils.stats.cross_section_regression':

- 'run_cross_sectional_regression'
- 'run_monthly_factor_returns'

We simulate data to estimate cross-sectional factor returns and visualize the results.

# Import modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from vbase_py_utils.stats.cross_section_regression import (
    run_cross_sectional_regression,
    run_monthly_factor_returns,
)

# Simulate data

In [None]:
np.random.seed(42)

n_assets = 50
n_factors = 3
n_months = 12

assets = [f"Asset_{i}" for i in range(n_assets)]
factors = [f"Factor_{j}" for j in range(n_factors)]
periods = [f"2023-{str(m).zfill(2)}" for m in range(1, n_months + 1)]

# Exposures: MultiIndex DataFrame (period, asset)
idx = pd.MultiIndex.from_product([periods, assets], names=["period", "asset"])
exposures_df = pd.DataFrame(np.random.randn(len(idx), n_factors), index=idx, columns=factors)

# True factor returns
true_factor_returns = pd.DataFrame(
    np.random.randn(n_months, n_factors) * 0.02,
    index=periods,
    columns=factors,
)

# Simulate asset returns and weights
returns_data = {}
weights_data = {}

for period in periods:
    X = exposures_df.loc[period].values
    f = true_factor_returns.loc[period].values
    noise = np.random.randn(n_assets) * 0.005
    r_vals = X @ f + noise
    returns_data[period] = pd.Series(r_vals, index=assets)

    # Equal weights
    weights_data[period] = pd.Series(1.0, index=assets)

returns_df = pd.DataFrame(returns_data)
weights_df = pd.concat(weights_data, names=["period", "asset"]).to_frame("weight")

# Run run_monthly_factor_returns

In [None]:
estimated_returns = run_monthly_factor_returns(
    returns_df=returns_df,
    exposures_df=exposures_df,
    weights_df=weights_df,
    huber_t=1.345,
)

# Compare results

In [None]:
comparison = pd.concat(
    [
        true_factor_returns.rename(columns=lambda x: f"True_{x}"),
        estimated_returns.rename(columns=lambda x: f"Est_{x}"),
    ],
    axis=1,
)

comparison.head()

# Visualize Factor 0

In [None]:
plt.figure(figsize=(10, 4))
plt.plot(comparison.index, comparison["True_Factor_0"], marker="o", label="True Factor_0")
plt.plot(comparison.index, comparison["Est_Factor_0"], marker="x", label="Estimated Factor_0")
plt.xticks(rotation=45)
plt.title("True vs Estimated Factor 0")
plt.xlabel("Period")
plt.ylabel("Factor Return")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()