# 04 Structural Break Test


## Data Loading

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

# ==========================================
# 1. 数据加载与双日期预处理 (关键修改点!)
# ==========================================
df = pd.read_csv('tables/FullSample.csv')
df['Year'] = df['Unnamed: 0'].str[:4].astype(int)

# 构造两个不同的 Dummy 变量
# Dummy A: 监管断点 (用于波动率检验)
df['Post2010'] = (df['Year'] >= 2010).astype(int) 

# Dummy B: 危机断点 (用于定价检验)
df['Post2008'] = (df['Year'] >= 2008).astype(int)


## Volatility Break Test

 波动率断裂检验方程 (Mechanism Break Test)这是那个 $t = -6.06$ 的检验，用于证明监管抑制了波动。$$LevFac_t^2 = \alpha + \gamma \cdot D_{Post2010,t} + \epsilon_t$$
 - 变量说明：$LevFac_t^2$: 标准化杠杆因子的平方（作为方差/波动率的代理变量）。
 - $D_{Post2010,t}$: 虚拟变量 (Dummy Variable)，当 $t \ge 2010$ 时取 1，否则取 0。
 - $\gamma$: 核心系数。如果 $\gamma < 0$ 且显著，说明 2010 年后的波动率显著下降。

In [2]:
# ==========================================
# 2. 核心检验 A: 波动率断裂 (使用 2010)
# ==========================================
# 理论逻辑: 监管(2010+)抑制了波动
df['LevFac_Sq'] = df['LevFac'] ** 2
X_vol = sm.add_constant(df['Post2010'])  # <--- 注意这里用 Post2010
y_vol = df['LevFac_Sq']
model_vol = sm.OLS(y_vol, X_vol).fit()


## Pricing Break Test

定价断裂检验方程 (Pricing Break Test)这是那个交互项回归（Chow Test 的现代形式），用于检验定价能力是否消失。$$R_{i,t}^e = \alpha + \beta_{pre} \cdot LevFac_t + \delta \cdot D_{Post2010,t} + \beta_{diff} \cdot (LevFac_t \times D_{Post2010,t}) + \epsilon_t$$
- 变量说明：$R_{i,t}^e$: 资产 $i$ (如动量赢家组合) 的超额收益。
- $\beta_{pre}$: 2010 年之前的风险暴露（Pricing Beta）。理论上应显著为正。
- $\beta_{diff}$: 2010 年之后斜率的变化量（即交互项系数）。
- 检验逻辑：如果 $\beta_{diff}$ 显著为负，且 $\beta_{pre} + \beta_{diff} \approx 0$，则证明定价能力在 2010 年后被抵消归零。

In [3]:
# ==========================================
# 3. 核心检验 B: 定价断裂 (使用 2008)
# ==========================================
# 理论逻辑: 危机(2008+)破坏了定价机制
df['LevFac_Post2008'] = df['LevFac'] * df['Post2008'] # <--- 构造基于 2008 的交互项

X_pricing = df[['LevFac', 'Post2008', 'LevFac_Post2008']] # <--- 注意这里用 Post2008
X_pricing = sm.add_constant(X_pricing)

if 'RF' in df.columns and 'Mom 10' in df.columns:
    df['Mom10_Ex'] = df['Mom 10'] - df['RF']
    y_mom = df['Mom10_Ex']
    model_mom = sm.OLS(y_mom, X_pricing).fit()
else:
    model_mom = None


In [4]:
# ==========================================
# 4. 生成专业学术报告 (适配双日期)
# ==========================================
def print_academic_report_dual(model_vol, model_mom):
    def get_stars(p):
        if p < 0.01: return "***"
        elif p < 0.05: return "**"
        elif p < 0.1: return "*"
        else: return ""

    print("\n" + "="*70)
    print("      STRUCTURAL BREAK TEST REPORT (DUAL DATE SPECIFICATION)")
    print("="*70)

    # --- Panel A: 2010 Break ---
    print("\n>>> PANEL A: MECHANISM BREAK (Volatility)")
    print("    Break Date: 2010 (Regulatory Era)")
    print("-" * 65)
    print(f"{'Variable':<20} {'Coef':<10} {'t-stat':<10} {'P-value':<10}")
    print("-" * 65)
    
    # Volatility Results
    beta_vol = model_vol.params['Post2010']
    t_vol = model_vol.tvalues['Post2010']
    p_vol = model_vol.pvalues['Post2010']
    print(f"{'Post2010 Dummy':<20} {beta_vol:.4f}{get_stars(p_vol):<4} {t_vol:.4f}{'':<4} {p_vol:.4f}")
    
    if p_vol < 0.01 and beta_vol < 0:
        print(f"[解读] 显著! 监管期波动率下降 (t={t_vol:.2f})。机制成立。")

    # --- Panel B: 2008 Break ---
    if model_mom is not None:
        print("\n\n>>> PANEL B: PRICING BREAK (Outcome)")
        print("    Break Date: 2008 (Crisis Onset)")
        print("-" * 65)
        print(f"{'Variable':<20} {'Coef':<10} {'t-stat':<10} {'P-value':<10}")
        print("-" * 65)
        
        # Pricing Results
        b1 = model_mom.params['LevFac']
        b3 = model_mom.params['LevFac_Post2008'] # 注意这里取的是 Post2008 的交互项
        t_b3 = model_mom.tvalues['LevFac_Post2008']
        p_b3 = model_mom.pvalues['LevFac_Post2008']
        
        print(f"{'LevFac (Pre)':<20} {b1:.4f}{get_stars(model_mom.pvalues['LevFac']):<4} {model_mom.tvalues['LevFac']:.4f}{'':<4} {model_mom.pvalues['LevFac']:.4f}")
        print(f"{'Interact (Post08)':<20} {b3:.4f}{get_stars(p_b3):<4} {t_b3:.4f}{'':<4} {p_b3:.4f}")
        
        if p_b3 < 0.05 and b3 < 0:
            print(f"[解读] 显著! 2008年后定价能力失效 (t={t_b3:.2f})。结果成立。")
        else:
            print(f"[解读] 交互项不显著 (t={t_b3:.2f})。")

    print("\n" + "="*70)

# 运行报告
print_academic_report_dual(model_vol, model_mom)


      STRUCTURAL BREAK TEST REPORT (DUAL DATE SPECIFICATION)

>>> PANEL A: MECHANISM BREAK (Volatility)
    Break Date: 2010 (Regulatory Era)
-----------------------------------------------------------------
Variable             Coef       t-stat     P-value   
-----------------------------------------------------------------
Post2010 Dummy       -1.2652***  -6.0556     0.0000
[解读] 显著! 监管期波动率下降 (t=-6.06)。机制成立。


>>> PANEL B: PRICING BREAK (Outcome)
    Break Date: 2008 (Crisis Onset)
-----------------------------------------------------------------
Variable             Coef       t-stat     P-value   
-----------------------------------------------------------------
LevFac (Pre)         0.0095     1.3125     0.1907
Interact (Post08)    -0.0523**   -2.1739     0.0308
[解读] 显著! 2008年后定价能力失效 (t=-2.17)。结果成立。



In [5]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

# ==========================================
# 0. 辅助函数：计算显著性星星
# ==========================================
def get_stars(p_value):
    """根据P值返回显著性星号"""
    if p_value < 0.01:
        return "***"  # 1% 显著
    elif p_value < 0.05:
        return "**"   # 5% 显著
    elif p_value < 0.1:
        return "*"    # 10% 显著
    else:
        return ""

# ==========================================
# 1. 数据准备
# ==========================================
df = pd.read_csv('FullSample.csv')
df['Year'] = df['Unnamed: 0'].str[:4].astype(int)

# 定义子样本
df_pre_vol = df[df['Year'] < 2010].copy()   # 波动率 Pre (Before 2010)
df_post_vol = df[df['Year'] >= 2010].copy() # 波动率 Post (After 2010)

df_pre_price = df[df['Year'] < 2008].copy() # 定价 Pre (Before 2008)
df_post_price = df[df['Year'] >= 2008].copy() # 定价 Post (After 2008)

# 准备 Full Sample 用于计算 Difference (交互项/Dummy)
df['Post2010'] = (df['Year'] >= 2010).astype(int)
df['Post2008'] = (df['Year'] >= 2008).astype(int)
df['LevFac_Sq'] = df['LevFac'] ** 2
df['LevFac_Post2008'] = df['LevFac'] * df['Post2008']

if 'Mom 10' in df.columns and 'RF' in df.columns:
    df['Mom10_Ex'] = df['Mom 10'] - df['RF']
    df_pre_price['Mom10_Ex'] = df_pre_price['Mom 10'] - df_pre_price['RF']
    df_post_price['Mom10_Ex'] = df_post_price['Mom 10'] - df_post_price['RF']

# ==========================================
# 步骤 1: 填 Panel A (Volatility)
# ==========================================
print("\n" + "="*60)
print(">>> DATA FOR PANEL A (VOLATILITY)")
print("="*60)

# Col 1: Pre-Break (Mean)
# 使用 np.ones 做常数项回归，参数在位置 [0]
model_pre_mean = sm.OLS(df_pre_vol['LevFac']**2, np.ones(len(df_pre_vol))).fit()
p_pre = model_pre_mean.pvalues[0]
print(f"[Col 1] Pre-2010 Constant: {model_pre_mean.params[0]:.4f}{get_stars(p_pre)} (t={model_pre_mean.tvalues[0]:.2f})")

# Col 2: Post-Break (Mean)
model_post_mean = sm.OLS(df_post_vol['LevFac']**2, np.ones(len(df_post_vol))).fit()
p_post = model_post_mean.pvalues[0]
print(f"[Col 2] Post-2010 Constant: {model_post_mean.params[0]:.4f}{get_stars(p_post)} (t={model_post_mean.tvalues[0]:.2f})")

# Col 3: Difference (Dummy Coefficient)
model_diff = sm.OLS(df['LevFac_Sq'], sm.add_constant(df['Post2010'])).fit()
p_diff = model_diff.pvalues['Post2010']
print(f"[Col 3] Difference (Coef): {model_diff.params['Post2010']:.4f}{get_stars(p_diff)} (t={model_diff.tvalues['Post2010']:.2f})")

# ==========================================
# 步骤 2: 填 Panel B (Pricing)
# ==========================================
print("\n" + "="*60)
print(">>> DATA FOR PANEL B (PRICING)")
print("="*60)

if 'Mom10_Ex' in df.columns:
    # Col 1: Pre-Break Beta
    X_pre = sm.add_constant(df_pre_price['LevFac'])
    model_price_pre = sm.OLS(df_pre_price['Mom10_Ex'], X_pre).fit()
    p_beta_pre = model_price_pre.pvalues['LevFac']
    print(f"[Col 1] Pre-2008 Beta: {model_price_pre.params['LevFac']:.4f}{get_stars(p_beta_pre)} (t={model_price_pre.tvalues['LevFac']:.2f})")

    # Col 2: Post-Break Beta
    X_post = sm.add_constant(df_post_price['LevFac'])
    model_price_post = sm.OLS(df_post_price['Mom10_Ex'], X_post).fit()
    p_beta_post = model_price_post.pvalues['LevFac']
    print(f"[Col 2] Post-2008 Beta: {model_price_post.params['LevFac']:.4f}{get_stars(p_beta_post)} (t={model_price_post.tvalues['LevFac']:.2f})")

    # Col 3: Difference (Interaction)
    X_full = sm.add_constant(df[['LevFac', 'Post2008', 'LevFac_Post2008']])
    model_price_diff = sm.OLS(df['Mom10_Ex'], X_full).fit()
    p_diff_interact = model_price_diff.pvalues['LevFac_Post2008']
    print(f"[Col 3] Difference (Interact): {model_price_diff.params['LevFac_Post2008']:.4f}{get_stars(p_diff_interact)} (t={model_price_diff.tvalues['LevFac_Post2008']:.2f})")

print("\n" + "="*60)
print("Note: *** p<0.01, ** p<0.05, * p<0.1")


>>> DATA FOR PANEL A (VOLATILITY)
[Col 1] Pre-2010 Constant: 1.3962*** (t=11.02)
[Col 2] Post-2010 Constant: 0.1310*** (t=12.24)
[Col 3] Difference (Coef): -1.2652*** (t=-6.06)

>>> DATA FOR PANEL B (PRICING)
[Col 1] Pre-2008 Beta: 0.0095 (t=1.29)
[Col 2] Post-2008 Beta: -0.0428* (t=-1.96)
[Col 3] Difference (Interact): -0.0523** (t=-2.17)

Note: *** p<0.01, ** p<0.05, * p<0.1


  p_pre = model_pre_mean.pvalues[0]
  print(f"[Col 1] Pre-2010 Constant: {model_pre_mean.params[0]:.4f}{get_stars(p_pre)} (t={model_pre_mean.tvalues[0]:.2f})")
  p_post = model_post_mean.pvalues[0]
  print(f"[Col 2] Post-2010 Constant: {model_post_mean.params[0]:.4f}{get_stars(p_post)} (t={model_post_mean.tvalues[0]:.2f})")
