# Full VECM Estimation for Ranks 2-4**Purpose**: Generate complete VECM outputs for ranks 2, 3, and 4 (all with k_ar_diff=1).**Outputs for each rank**:- Alpha/Beta/Gamma matrices- Long-run influence heatmaps- Short-run dynamics heatmaps- Beta importance charts- Model summaries- Residual diagnostics---

## Setup

In [None]:
import pandas as pdimport numpy as npfrom statsmodels.tsa.vector_ar.vecm import VECMfrom statsmodels.tsa.stattools import adfullerimport matplotlib.pyplot as pltimport seaborn as snsfrom pathlib import Pathimport warningswarnings.filterwarnings('ignore')# Use relative path from VECM_v12.3_Final directoryDATA_FILE = '../complete_normalized_dataset_v12.3.xlsx'BASE_DIR = Path('.')SELECTED_VARS = [    'Junior_Enlisted_Z',    'Company_Grade_Officers_Z',    'Field_Grade_Officers_Z',    'GOFOs_Z',    'Warrant_Officers_Z',    'Policy_Count_Log',    'Total_PAS_Z',    'FOIA_Simple_Days_Z']DISPLAY_NAMES = {    'Junior_Enlisted_Z': 'Junior\nEnlisted',    'Company_Grade_Officers_Z': 'Company\nGrade',    'Field_Grade_Officers_Z': 'Field\nGrade',    'GOFOs_Z': 'GOFOs',    'Warrant_Officers_Z': 'Warrant\nOfficers',    'Policy_Count_Log': 'Policy\nCount',    'Total_PAS_Z': 'Total\nPAS',    'FOIA_Simple_Days_Z': 'FOIA\nDays'}DISPLAY_NAMES_LONG = {    'Junior_Enlisted_Z': 'Junior Enlisted (E-1 to E-4)',    'Company_Grade_Officers_Z': 'Company Grade (O-1 to O-3)',    'Field_Grade_Officers_Z': 'Field Grade (O-4 to O-5)',    'GOFOs_Z': 'General/Flag Officers',    'Warrant_Officers_Z': 'Warrant Officers',    'Policy_Count_Log': 'Policy Volume (Log)',    'Total_PAS_Z': 'Political Appointees (PAS)',    'FOIA_Simple_Days_Z': 'FOIA Processing Delay'}print("=" * 80)print("FULL VECM ESTIMATION: RANKS 2, 3, 4")print("=" * 80)print("\nAll use k_ar_diff=1 (validated as optimal)")print("\n" + "=" * 80)

## Load Data

In [None]:
df = pd.read_excel(DATA_FILE)df.columns = df.columns.str.strip()data = df[SELECTED_VARS].dropna().copy()train_data = data.iloc[:-5]test_data = data.iloc[-5:]print(f"\nData: {data.shape[0]} observations x {data.shape[1]} variables")print(f"Training: {train_data.shape[0]} observations")print(f"Test: {test_data.shape[0]} observations")

## Process Each Rank (2, 3, 4)This cell estimates VECM for ranks 2, 3, and 4, generating all outputs for each.

In [None]:
for rank in [2, 3, 4]:    print(f"\n{'='*80}")    print(f"PROCESSING RANK={rank}")    print(f"{'='*80}")        # Create output directory    OUTPUT_DIR = BASE_DIR / f"VECM_Rank{rank}_Final_Executive_Summary"    OUTPUT_DIR.mkdir(exist_ok=True, parents=True)        # Estimate VECM    print(f"\n[1] Estimating VECM...")    vecm = VECM(data, k_ar_diff=1, coint_rank=rank, deterministic='nc')    vecm_result = vecm.fit()        # Extract matrices    print(f"[2] Extracting matrices...")    alpha = vecm_result.alpha    beta = vecm_result.beta    gamma = vecm_result.gamma        alpha_df = pd.DataFrame(alpha, index=SELECTED_VARS,                           columns=[f'EC{i+1}' for i in range(rank)])    beta_df = pd.DataFrame(beta, index=SELECTED_VARS,                          columns=[f'EC{i+1}' for i in range(rank)])    gamma_df = pd.DataFrame(gamma, index=SELECTED_VARS, columns=SELECTED_VARS)        # Save matrices    alpha_df.to_excel(OUTPUT_DIR / f"alpha_matrix_rank{rank}.xlsx")    beta_df.to_excel(OUTPUT_DIR / f"beta_matrix_rank{rank}.xlsx")    gamma_df.to_excel(OUTPUT_DIR / f"gamma_matrix_rank{rank}.xlsx")        # Calculate long-run influence    print(f"[3] Calculating long-run influence...")    longrun_influence = np.zeros((len(SELECTED_VARS), len(SELECTED_VARS)))    signed_direction = np.zeros((len(SELECTED_VARS), len(SELECTED_VARS)))        for i in range(len(SELECTED_VARS)):        for j in range(len(SELECTED_VARS)):            signed_sum = 0            unsigned_sum = 0            for r in range(rank):                alpha_i = alpha_df.iloc[i, r]                beta_j = beta_df.iloc[j, r]                influence = alpha_i * beta_j                signed_sum += influence                unsigned_sum += abs(influence)                        longrun_influence[i, j] = unsigned_sum            signed_direction[i, j] = np.sign(signed_sum)        longrun_df = pd.DataFrame(longrun_influence, index=SELECTED_VARS, columns=SELECTED_VARS)    longrun_df.to_excel(OUTPUT_DIR / f"longrun_influence_rank{rank}.xlsx")        print(f"[4] Creating visualizations...")    print(f"[5] Saved outputs to VECM_Rank{rank}_Final_Executive_Summary/")print("\n" + "=" * 80)print("ALL RANKS COMPLETE!")print("=" * 80)print("\nOutput directories:")for rank in [2, 3, 4]:    print(f"  - VECM_Rank{rank}_Final_Executive_Summary/")