In [None]:
import json
from pathlib import Path
import pandas as pd

root = Path("exp2")
rows = []

for f in root.rglob("*.json"):
    # 目录结构: exp2/exp2/N?/model/metrics_*.json
    model = f.parent.name
    n = f.parent.parent.name
    kind = "pointwise" if "pointwise" in f.name else "distribution"

    data = json.loads(f.read_text(encoding="utf-8"))
    for domain, metrics in data.items():
        row = {
            "N": n,
            "model": model,
            "kind": kind,
            "domain": domain,
        }
        row.update(metrics)
        rows.append(row)

df = pd.DataFrame(rows)
display(df)


In [None]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# ==== 1) 读取 exp2 ====
root = Path("exp2")
rows = []

for f in root.rglob("*.json"):
    model = f.parent.name   # 这里的 model 实际是消融名
    n = f.parent.parent.name
    kind = "pointwise" if "pointwise" in f.name else "distribution"
    data = json.loads(f.read_text(encoding="utf-8"))
    for domain, metrics in data.items():
        row = {"N": n, "ablation": model, "kind": kind, "domain": domain}
        row.update(metrics)
        rows.append(row)

df = pd.DataFrame(rows)

# ==== 2) 只取 N1 pointwise + 3个指标 ====
df = df[(df["kind"]=="pointwise") & (df["N"]=="N1")].copy()
metrics = ["spearman_rho","js_divergence","rmse"]

# ==== 3) 设定基线（请修改这里） ====
BASE = "Full"   # TODO: 改成你的基线文件夹名

# ==== 4) 计算增益（相对基线） ====
# 对于 spearman：越大越好 => gain = ablation - base
# 对于 js/rmse：越小越好 => gain = base - ablation
better_high = {"spearman_rho"}
better_low = {"js_divergence","rmse"}

# 拿到基线
base = df[df["ablation"] == BASE][["domain"] + metrics].copy()
base = base.rename(columns={m: f"{m}_base" for m in metrics})

merged = df.merge(base, on="domain", how="inner")

for m in metrics:
    if m in better_high:
        merged[f"{m}_gain"] = merged[m] - merged[f"{m}_base"]
    else:
        merged[f"{m}_gain"] = merged[f"{m}_base"] - merged[m]
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

# ==== 5) 画图：每个 domain 一张图，三个指标一行（每个子图都有y轴标签） ====
sns.set_theme(style="white")

for domain in sorted(merged["domain"].unique()):
    sub = merged[merged["domain"] == domain].copy()

    fig, axes = plt.subplots(
        1, len(metrics),
        figsize=(14, 0.45*sub["ablation"].nunique() + 2.5),
        sharey=False
    )

    for ax, m in zip(axes, metrics):
        gain_col = f"{m}_gain"
        order = sub.groupby("ablation")[gain_col].mean().sort_values(ascending=False).index

        pivot = sub.pivot_table(
            index="ablation",
            values=gain_col,
            aggfunc="mean"
        ).reindex(order)

        sns.heatmap(
            pivot,
            cmap="RdBu_r",
            center=0,
            annot=True,
            fmt=".3f",
            cbar=True,
            cbar_kws={"label": f"{m} gain (vs {BASE})"},
            ax=ax
        )

        ax.set_title(f"{domain} | {m}")
        ax.set_xlabel("")
        ax.set_ylabel("Ablation")
        ax.tick_params(axis="y", labelleft=True)

    fig.suptitle(f"{domain} - Ablation Gains (N1 pointwise)", y=1.02, weight="bold")
    plt.tight_layout()
    plt.show()


In [None]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(
    style="whitegrid",
    font="Times New Roman",
    rc={
        "font.size": 14,
        "axes.titlesize": 16,
        "axes.labelsize": 16,
        "xtick.labelsize": 13,
        "ytick.labelsize": 16,
    },
)

# 1) 读取 exp2 N1 pointwise
root = Path("exp2")
rows = []
for f in root.rglob("*.json"):
    model = f.parent.name
    n = f.parent.parent.name
    kind = "pointwise" if "pointwise" in f.name else "distribution"
    data = json.loads(f.read_text(encoding="utf-8"))
    for domain, metrics in data.items():
        row = {"N": n, "ablation": model, "kind": kind, "domain": domain}
        row.update(metrics)
        rows.append(row)

df = pd.DataFrame(rows)

# 1.5) N1 pointwise + N50 (mae, coverage_rate)
sub_n1 = df[(df["N"]=="N1") & (df["kind"]=="pointwise")].copy()
sub_n50 = df[(df["N"]=="N50") & (df["kind"]=="distribution")].copy()

sub = sub_n1.copy()
n50_metrics = [m for m in ["mae", "coverage_rate"] if m in df.columns]
if n50_metrics:
    cols = ["domain", "ablation"] + n50_metrics
    n50_vals = sub_n50[cols].dropna()
    sub = sub.drop(columns=n50_metrics, errors="ignore")
    sub = sub.merge(n50_vals, on=["domain", "ablation"], how="left")

# 2) 基线
BASE = "Full"  # 改成你的基线名
metrics = ["spearman_rho", "rmse", "js_divergence", "mae", "coverage_rate"]
higher_better = {"spearman_rho", "coverage_rate"}
lower_better = {"js_divergence", "rmse", "mae"}

base = sub[sub["ablation"] == BASE][["domain"] + metrics].rename(
    columns={m: f"{m}_base" for m in metrics}
)
m = sub.merge(base, on="domain", how="inner")
m = m[m["ablation"] != BASE].copy()

for metric in metrics:
    if metric in higher_better:
        m[f"{metric}_delta"] = m[metric] - m[f"{metric}_base"]
    else:
        m[f"{metric}_delta"] = m[f"{metric}_base"] - m[metric]  # >0 更好

# 3) 计算“变好占比”
eps = 1e-12
out = []
for d in sorted(m["domain"].unique()):
    for metric in metrics:
        v = m[m["domain"]==d][f"{metric}_delta"].dropna()
        pct_good = (v > eps).mean() * 100 if len(v) else np.nan
        out.append({"domain": d, "metric": metric, "good_pct": pct_good})

stat = pd.DataFrame(out)
pivot = stat.pivot(index="domain", columns="metric", values="good_pct")
pivot = pivot[["spearman_rho", "rmse", "js_divergence", "mae", "coverage_rate"]]

domain_order = [d for d in ["spending", "labor", "credit"] if d in pivot.index]
pivot = pivot.reindex(domain_order)

domain_label_map = {"spending": "Spending", "labor": "Labor", "credit": "Credit"}
metric_label_map = {"spearman_rho": "Sperman_rho", "js_divergence": "JS_Divergence", "rmse": "RMSE", "mae": "MAE", "coverage_rate": "Coverage_Rate"}
pivot_display = pivot.rename(index=domain_label_map, columns=metric_label_map)

# 4) 紧凑热力图
fig, ax = plt.subplots(figsize=(5.2, 2.8))
ax = sns.heatmap(
    pivot_display,
    annot=True, fmt=".0f",
    vmin=0, vmax=100,
    cmap="Blues",
    linewidths=0.5,
    annot_kws={"size": 16, "weight": "bold"}
    #cbar_kws={"label": "features whose removal improves metric"}
)
plt.title("Feature Removal Improvement Ratio", fontsize=16)
plt.xlabel("")
plt.ylabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation=23, ha="right")
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, va="center")
plt.tight_layout()

# 保存到 resultPic/exp2/exp2.pdf
out_dir = Path("/xxxxxxxxxxxxxx/result") / root.name
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / f"{root.name}.pdf"
plt.savefig(out_path, dpi=300, bbox_inches="tight")
print("Saved:", out_path)

plt.show()
