# Results & Diagnostics: Predictive, Interventional, and Robustness

**Goal.** Evaluate models on **predictive fit**, **interventional fidelity**, and **robustness under interventions**, mirroring the thesis.


In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))  
    
DATA_DIR = PROJECT_ROOT / "data" / "processed"
CONFIGS_DIR = PROJECT_ROOT / "configs"
OUTPUTS_DIR = PROJECT_ROOT / "outputs" / "finance"
BEST_DIR = CONFIGS_DIR / "best_config.yaml"

OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

import os, yaml, json, math, torch, numpy as np, pandas as pd
import torch.nn.functional as F
from pathlib import Path
from torch_geometric.loader import DataLoader
from torch.utils.data import Subset
import matplotlib.pyplot as plt

from src.models import GNN_NCM
from src.dataloader import CausalFactorDataset
from src.trainer import CausalTwoPartTrainer  



## 1. Load Best Parameters

In [None]:
cfg_path = Path(BEST_DIR)
with open(cfg_path, "r") as f:
    cfg = yaml.safe_load(f)

device = torch.device("cuda" if (cfg.get("device","cuda") == "cuda" and torch.cuda.is_available()) else "cpu")

print("device:", device)


In [None]:
ds = CausalFactorDataset(
    root_dir=DATA_DIR,
    drop_self_for_target=True,
)
split = int(0.8 * len(ds))
train_loader = DataLoader(Subset(ds, range(split)), batch_size=cfg["data"]["batch_size"], shuffle=True)
val_loader   = DataLoader(Subset(ds, range(split, len(ds))), batch_size=cfg["data"]["batch_size"], shuffle=False)


# dims
g0 = next(iter(train_loader))
num_features = g0.num_node_features
num_edges    = g0.edge_index.size(1)



print(f"num_features={num_features} | num_edges={num_edges} | nodes={g0.num_nodes}")


## 2. Initializing Models and Training

In [None]:
model_per_edge = GNN_NCM(
    num_features=num_features,
    num_edges=num_edges,
    gnn_mode=cfg["model"]["gnn_mode"],
    hidden_dim=cfg["model"]["hidden_dim"],
    out_dim=cfg["model"]["out_dim"],
    noise_dim=cfg["model"]["noise_dim"],
).to(device)


model_shared = GNN_NCM(
    num_features=num_features,
    num_edges=num_edges,
    gnn_mode="shared",
    hidden_dim=cfg["model"]["hidden_dim"],
    out_dim=cfg["model"]["out_dim"],
    noise_dim=cfg["model"]["noise_dim"],
   
).to(device)




In [None]:
tcfg = cfg["training"]
trainer = CausalTwoPartTrainer(
    epochs_obs=tcfg["epochs_obs"], epochs_do=tcfg["epochs_do"],
    lr=tcfg["lr"], w_obs=tcfg["w_obs"], w_do=tcfg["w_do"],
    weight_decay=tcfg["weight_decay"], clip=tcfg["clip"],
    neutral=tcfg["neutral"], delta=tcfg["delta"]
)

trainer.train(model_per_edge, train_loader, val_loader=val_loader)
val_mse_per_edge = trainer.evaluate_obs_mse(model_per_edge, val_loader)


trainer.train(model_shared, train_loader, val_loader=val_loader)    
val_mse_shared = trainer.evaluate_obs_mse(model_shared, val_loader)


In [None]:
from src.models import BaselineGCN
from torch import nn, optim


baseline_model = BaselineGCN(num_features=num_features, hidden_dim=cfg["model"]["hidden_dim"], out_dim=cfg["model"]["out_dim"])
optimizer_baseline = optim.Adam(baseline_model.parameters())
loss_fn_baseline = nn.MSELoss()


for ep in range(200):
    for g in train_loader:
        baseline_model.train()
        optimizer_baseline.zero_grad()
        pred = baseline_model(g.x, g.edge_index)
        loss = loss_fn_baseline(pred, g.y)
        loss.backward(); optimizer_baseline.step()
    if (ep+1) % 40 == 0:
        print(f"[Baseline] ep {ep+1:03d} loss={loss.item():.4f}")

# quick baseline val
baseline_model.eval()
with torch.no_grad():
    total = 0.0; n = 0
    for g in val_loader:

        VOL_IDX = ds.node_map["VOL"]
        total += float(( (baseline_model(g.x, g.edge_index).squeeze(-1)[VOL_IDX] - g.y.squeeze(-1)[VOL_IDX]) ** 2 ).item())
        n += 1
val_loss_bl = total / max(n,1)
print(f"[baseline] val_mse={val_loss_bl:.6f}")

## 3. Predictive Fit

- Metric: **MSE / RMSE** on `VOL` (validation split).


In [None]:
pred_fit = {
    "per_edge": {"mse": float(val_mse_per_edge), "rmse": float(np.sqrt(val_mse_per_edge))},
    "shared":   {"mse": float(val_mse_shared),   "rmse": float(np.sqrt(val_mse_shared))},
    "gcn":      {"mse": float(val_loss_bl),      "rmse": float(np.sqrt(val_loss_bl))},
}
print(pred_fit)

with open(os.path.join(OUTPUTS_DIR, "predictive_fit_vol.json"), "w") as f:
    json.dump(pred_fit, f, indent=2)


## 4. Robustness Stress Tests (Finance Panel)

Two stress tests:
1) `do(BAS = BAS + delta)`
2) `do(Mom = Mom + delta)`

Compute degradation ratio:

- Plot bar chart of Deg per model; also plot absolute change in MSE.


In [None]:
# stress degradation
import numpy as np, json, os

@torch.no_grad()
def stress_degradation(model, loader, ops):
    # factual MSE 
    mses_f = []
    for g in loader:
        mses_f.append(float(((model(g.x, g.edge_index) - g.y)**2).mean().item()))
    mse_f = float(np.mean(mses_f))

    out = []
    for op in ops:
        mses_s = []
        for g in loader:
            x = g.x.clone()
            node = op["node"]
            new_val = op["value_fn"](float(x[node,0].item())) if "value_fn" in op else float(op["value_const"])
            try:
                yhat_s = model.do_intervention(
                    x, g.edge_index,
                    intervened_nodes=[node],
                    new_feature_values=torch.tensor([new_val]).float()
                )
            except AttributeError:
                x[node,0] = new_val
                yhat_s = model(x, g.edge_index)
            mses_s.append(float(((yhat_s - g.y)**2).mean().item()))
        mse_s = float(np.mean(mses_s))
        out.append({"stress_name": op["name"], "mse_factual": mse_f,
                    "mse_stress": mse_s, "degradation_ratio": float(mse_s/(mse_f+1e-12))})
    return out


# build ops
BAS_IDX, MOM_IDX = ds.node_map["BAS"], ds.node_map["Mom"]
DELTA_BAS = 0.5

ops = [
    {"name": "do_BAS_plus_delta", "node": BAS_IDX, "value_fn": lambda v: v + DELTA_BAS},
    {"name": "do_MOM_plus_delta", "node": MOM_IDX, "value_fn": lambda v: v + DELTA_BAS},
]

models = {
    "per_edge": model_per_edge.eval().cpu(),
    "shared":   model_shared.eval().cpu(),
    "gcn":      baseline_model.eval().cpu(),
}

stress = {name: stress_degradation(m, val_loader, ops) for name, m in models.items()}
print(stress)

os.makedirs(OUTPUTS_DIR, exist_ok=True)
with open(os.path.join(OUTPUTS_DIR, "stress_finance.json"), "w") as f:
    json.dump(stress, f, indent=2)



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

rows = []
for model_name, lst in stress.items():
    for d in lst:
        r = d.copy()
        r["model"] = model_name
        rows.append(r)
df = pd.DataFrame(rows)[["model","stress_name","mse_factual","mse_stress","degradation_ratio"]]

# --- Plot 1: degradation ratio under do(BAS = BAS + Δ) ---
plt.figure(figsize=(6,4))
subset = df[df["stress_name"]=="do_BAS_plus_delta"]
plt.bar(subset["model"], subset["degradation_ratio"])
plt.title("Stress: do(BAS = BAS + Δ) — Degradation Ratio")
plt.xlabel("Model")
plt.ylabel("MSE_stress / MSE_factual")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUTS_DIR, "stress_degradation_do_BAS.png"), dpi=150)
plt.show()

# --- Plot 2: degradation ratio under do(Mom = Mom + Δ) ---
plt.figure(figsize=(6,4))
subset2 = df[df["stress_name"]=="do_MOM_plus_delta"]
plt.bar(subset2["model"], subset2["degradation_ratio"])
plt.title("Stress: do(Mom = Mom + Δ) — Degradation Ratio")
plt.xlabel("Model")
plt.ylabel("MSE_stress / MSE_factual")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUTS_DIR, "stress_degradation_do_MOM.png"), dpi=150)
plt.show()

# --- Plot 3: factual MSE (mean across rows for each model) ---
plt.figure(figsize=(6,4))
mse_factual = df.groupby("model")["mse_factual"].mean().reset_index()
plt.bar(mse_factual["model"], mse_factual["mse_factual"])
plt.title("Factual MSE (Val split)")
plt.xlabel("Model")
plt.ylabel("MSE (factual)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUTS_DIR, "factual_mse.png"), dpi=150)
plt.show()

df
