In [1]:
import os, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# ----------------------------
# Config
# ----------------------------
CSV_PATH = "results/koltsov3_4different_perm1_results.csv"     # <-- change if needed
OUT_PDF  = "results/koltsov3_report.pdf"         # one multi-page PDF
HEATMAP_NS = [20, 24, 25, 28, 30]                    # snapshots for heatmaps (edit)
MAX_D_FACETS = 12                                # cap number of d panels in some plots

N_MIN_PLOT = 12          # start plots at this n (tune: 10/12/15)
MAX_D_PANELS = 10        # how many d panels per page
SAMPLE_K_LINES = 6       # set 0 to disable showing sample k lines


os.makedirs(os.path.dirname(OUT_PDF), exist_ok=True)

# ----------------------------
# Load + parse
# ----------------------------
df = pd.read_csv(CSV_PATH)

def parse_dict_int_keys(s):
    dct = json.loads(s)
    return {int(k): dct[k] for k in dct}

df["diam_dict"] = df["diameters"].apply(parse_dict_int_keys)
df["last_layer_dict"] = df["last_layers"].apply(parse_dict_int_keys)
df["states_dict"] = df["total_states"].apply(parse_dict_int_keys)
df["n_list"] = df["n_values"].apply(json.loads)

# Long-form
rows = []
rows_ll = []
for _, r in df.iterrows():
    k = int(r["k_param"])
    d = int(r["d_param"])
    perm_type = int(r["perm_type"])
    coset = str(r["coset"])
    for n, diam in r["diam_dict"].items():
        n = int(n)
        rows.append({"k": k, "d": d, "perm_type": perm_type, "coset": coset, "n": n, "diameter": float(diam)})
        if n in r["last_layer_dict"]:
            rows_ll.append({
                "k": k, "d": d, "perm_type": perm_type, "coset": coset, "n": n,
                "diameter": float(diam),
                "last_layer_size": float(r["last_layer_dict"][n])
            })

long_df = pd.DataFrame(rows)
ll_df = pd.DataFrame(rows_ll)

long_df["n_mod_2"] = (long_df["n"] % 2).astype(int)
long_df["diam_over_n2"] = long_df["diameter"] / (long_df["n"] ** 2)

# Helpers
def make_heatmap_data_at_n(target_n: int):
    sub = long_df[long_df["n"] == target_n]
    if sub.empty:
        return None, None, None
    pivot = sub.pivot_table(index="d", columns="k", values="diameter", aggfunc="mean")
    pivot = pivot.sort_index().sort_index(axis=1)
    return pivot.index.to_numpy(), pivot.columns.to_numpy(), pivot.to_numpy()

def pick_d_values(max_count=MAX_D_FACETS):
    ds = sorted(long_df["d"].unique())
    if len(ds) <= max_count:
        return ds
    return ds[:max_count]

# ----------------------------
# Write multi-page PDF
# ----------------------------
with PdfPages(OUT_PDF) as pdf:

    # Page 1: Summary page
    fig = plt.figure(figsize=(11, 8.5))
    fig.suptitle("Koltsov3 diameter report (from aggregated CSV)", fontsize=16)
    txt = (
        f"CSV: {CSV_PATH}\n"
        f"Parameter sets (rows): {len(df)}\n"
        f"Data points (n,diameter): {len(long_df)}\n"
        f"Unique k: {len(sorted(df['k_param'].unique()))}\n"
        f"Unique d: {len(sorted(df['d_param'].unique()))}\n"
        f"Coset(s): {', '.join(sorted(df['coset'].astype(str).unique()))}\n"
        f"perm_type(s): {', '.join(map(str, sorted(df['perm_type'].unique())))}\n"
        f"Heatmap snapshots at n: {HEATMAP_NS}\n"
    )
    fig.text(0.06, 0.75, txt, fontsize=12, family="monospace")
    plt.axis("off")
    pdf.savefig(fig, bbox_inches="tight")
    plt.close(fig)

    # Pages: Heatmaps at fixed n
    for n0 in HEATMAP_NS:
        ds, ks, Z = make_heatmap_data_at_n(n0)
        if Z is None:
            continue

        fig, ax = plt.subplots(figsize=(11, 8.5))
        im = ax.imshow(Z, aspect="auto")
        ax.set_title(f"Diameter heatmap at n={n0}")
        ax.set_xlabel("k_param")
        ax.set_ylabel("d_param")
        ax.set_xticks(np.arange(len(ks)))
        ax.set_xticklabels(ks, rotation=90)
        ax.set_yticks(np.arange(len(ds)))
        ax.set_yticklabels(ds)
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label("Diameter")
        fig.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

# ----------------------------
# Clean, readable Diameter vs n pages (start at larger n)
# Shows min/median/max across k as a band, optionally a few sample k curves.
# ----------------------------
ds_all = sorted(long_df["d"].unique())

for d_chunk_start in range(0, len(ds_all), MAX_D_PANELS):
    ds_keep = ds_all[d_chunk_start:d_chunk_start + MAX_D_PANELS]

    fig, axes = plt.subplots(len(ds_keep), 1, figsize=(11, 2.2 * len(ds_keep)), sharex=True)
    if len(ds_keep) == 1:
        axes = [axes]

    for ax, d0 in zip(axes, ds_keep):
        sub = long_df[(long_df["d"] == d0) & (long_df["n"] >= N_MIN_PLOT)].copy()
        sub = sub.sort_values(["n", "k"])
        ax.set_title(f"Diameter vs n summary (d={d0}) — starting at n={N_MIN_PLOT}", fontsize=10)

        if sub.empty:
            ax.text(0.5, 0.5, "No data in this n-range", ha="center", va="center")
            ax.grid(True, alpha=0.2)
            continue

        # Compute per-n stats across k
        g = sub.groupby("n")["diameter"]
        n_vals = g.mean().index.to_numpy()
        ymin = g.min().to_numpy()
        ymed = g.median().to_numpy()
        ymax = g.max().to_numpy()

        # Band + median line
        ax.fill_between(n_vals, ymin, ymax, alpha=0.2, label="min..max across k")
        ax.plot(n_vals, ymed, linewidth=2, label="median across k")

        # Optional: overlay a few sample k curves (for context)
        if SAMPLE_K_LINES > 0:
            ks = sorted(sub["k"].unique())
            # evenly sample up to SAMPLE_K_LINES k-values
            if len(ks) > SAMPLE_K_LINES:
                idx = np.linspace(0, len(ks) - 1, SAMPLE_K_LINES).round().astype(int)
                ks = [ks[i] for i in idx]

            for k0 in ks:
                s2 = sub[sub["k"] == k0].sort_values("n")
                ax.plot(s2["n"], s2["diameter"], alpha=0.35, linewidth=1)

        ax.grid(True, alpha=0.2)
        ax.legend(loc="upper left", fontsize=8, frameon=False)

    axes[-1].set_xlabel("n")
    fig.tight_layout()
    pdf.savefig(fig)
    plt.close(fig)


    # Page: Parity test (n mod 2) for a few representative d’s
    # We'll show scatter per d with parity markers
    ds_keep = pick_d_values(min(MAX_D_FACETS, 8))
    fig, axes = plt.subplots(len(ds_keep), 1, figsize=(11, 1.8 * len(ds_keep)), sharex=True)
    if len(ds_keep) == 1:
        axes = [axes]

    for ax, d0 in zip(axes, ds_keep):
        sub = long_df[long_df["d"] == d0]
        ax.set_title(f"Parity check: diameter vs n (d={d0}, markers show n mod 2)", fontsize=10)

        even = sub[sub["n_mod_2"] == 0]
        odd = sub[sub["n_mod_2"] == 1]

        ax.scatter(even["n"], even["diameter"], s=10, marker="o", alpha=0.5, label="even n")
        ax.scatter(odd["n"], odd["diameter"], s=10, marker="x", alpha=0.5, label="odd n")

        ax.grid(True, alpha=0.2)

    axes[-1].set_xlabel("n")
    axes[0].legend(loc="upper left", fontsize=8)
    fig.tight_layout()
    pdf.savefig(fig)
    plt.close(fig)

    # Page: Last-layer size vs diameter (bottleneck scatter)
    if not ll_df.empty:
        fig, ax = plt.subplots(figsize=(11, 8.5))
        ax.scatter(ll_df["diameter"], ll_df["last_layer_size"], s=12, alpha=0.6)
        ax.set_title("Last-layer size vs diameter")
        ax.set_xlabel("Diameter")
        ax.set_ylabel("Last layer size")
        ax.grid(True, alpha=0.2)
        fig.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

    # Page: Normalized diameter (diameter / n^2) vs d at max n
    n_max = int(long_df["n"].max())
    sub = long_df[long_df["n"] == n_max].copy()
    if not sub.empty:
        fig, ax = plt.subplots(figsize=(11, 8.5))
        # jitter x slightly to see overlaps
        x = sub["d"].to_numpy() + (np.random.rand(len(sub)) - 0.5) * 0.12
        ax.scatter(x, sub["diam_over_n2"], s=14, alpha=0.6)
        ax.set_title(f"Normalized diameter at n={n_max}: diameter / n² vs d (jittered)")
        ax.set_xlabel("d_param")
        ax.set_ylabel("diameter / n²")
        ax.grid(True, alpha=0.2)
        fig.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

print("Wrote PDF report:", OUT_PDF)


Wrote PDF report: results/koltsov3_report.pdf
