In [10]:
# Reader utility for combined models pickle created by cannon-train.py
import os
import glob
import pickle
import numpy as np
from pathlib import Path


def find_latest_combined(base_dir="/data/mustard/vmehta"):
    # Look for *_combined_models.pkl files most recently modified
    paths = glob.glob(os.path.join(base_dir, "**", "*_combined_models.pkl"), recursive=True)
    if not paths:
        return None
    return max(paths, key=lambda p: os.path.getmtime(p))


def load_combined(path=None):
    if path is None:
        path = find_latest_combined()
        if path is None:
            raise FileNotFoundError("No combined model pickle found under /data/mustard/vmehta")
    with open(path, "rb") as f:
        combined = pickle.load(f)
    return path, combined


def summarize_combined(combined, preview_n=3):
    print("=== Combined Model Summary ===")
    print("filepath:", combined.get("filepath"))
    print("snr:", combined.get("snr"))
    labels = combined.get("labels")
    print("labels (", len(labels), "):", labels)
    wl = combined.get("wavelengths")
    print("wavelengths:", type(wl), "shape=" + (str(wl.shape) if isinstance(wl, np.ndarray) else "N/A"))
    folds = combined.get("folds", [])
    print("folds:", len(folds))
    for i, f in enumerate(folds[:preview_n]):
        m = f.get("model")
        pred = f.get("pred")
        true = f.get("true")
        print(f"  Fold {f.get('fold')}: model={type(m).__name__}, pred={pred.shape}, true={true.shape}")
    if len(folds) > preview_n:
        print(f"  ... ({len(folds) - preview_n} more folds)")


def compute_metrics(combined):
    # Compute MAE per label in log-space and in linear normalized space (like training post-processing)
    folds = combined.get("folds", [])
    if not folds:
        print("No folds found in combined data.")
        return None

    preds_log = []
    trues_log = []
    for f in folds:
        preds_log.append(f["pred"])  # these are predictions in log space from training code
        trues_log.append(f["true"])  # same shape

    P = np.vstack(preds_log)
    T = np.vstack(trues_log)

    # Log-space MAE
    mae_log = np.mean(np.abs(P - T), axis=0)

    # Convert to linear and normalize per sample
    P_lin = 10 ** P
    T_lin = 10 ** T
    P_lin = P_lin / P_lin.sum(axis=1, keepdims=True)
    T_lin = T_lin / T_lin.sum(axis=1, keepdims=True)
    mae_lin = np.mean(np.abs(P_lin - T_lin), axis=0)

    return {
        "mae_log": mae_log,
        "mae_lin": mae_lin,
    }

# Usage: load and summarize latest combined file; compute metrics
try:
    path, combined = load_combined()
    print("Loaded:", path)
    summarize_combined(combined)
    metrics = compute_metrics(combined)
    if metrics is not None:
        print("\n=== Metrics ===")
        print("MAE (log):", metrics["mae_log"]) 
        print("MAE (linear normalized):", metrics["mae_lin"]) 
except Exception as e:
    print("Reader utility error:", e)

Reader utility error: No combined model pickle found under /data/mustard/vmehta
