
# dFoF Batch Extraction (Suite2p → dFoF → Save)

This notebook scans your experiment directories (each containing `03_analysis/functional/suite2P/plane*`),
loads Suite2p outputs, runs **`process_suite2p_fluorescence`** from your **`dff_extraction`** repo, and saves outputs per-plane
in a new **`dFoF/`** subfolder. It also captures printed messages into per-plane log files and a session log.

Also merge per-plane dFoF outputs into a single experiment-level file in a later cell.
Also you can validate the merged outputs in the last cell.
the plotted raster image helps visually validate the merged result as well, this is save in the analysis folder.

outputs are saved as `.npy` arrays and a compressed `.npz` bundle, along with a `metadata.json` file.


In [2]:

# ==== Configuration (edit this) ===============================================
from pathlib import Path

# Base directory containing your experiment folders (Windows path allowed)
# Example: r"C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p"
BASE_DIR = Path(r"C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p")

# Optional: limit to specific experiment folders (by name) if you want
# Leave empty to process all subfolders that contain suite2P/plane* structure.
EXPERIMENT_WHITELIST = [
    "L433_f02_Exp_1_flickering",
    "L433_f03_Exp_1_flickering",
    "L433_f04_Exp_1_flickering",
    "L433_f05_Exp_1_flickering",
    "L433_f06_Exp_1_flickering",
    "L453_f07_Exp_1_flickering",
    "L453_f08_Exp_1_flickering",
    "L453_f09_Exp_1_flickering",
    "L453_f10_Exp_1_flickering",
    "L453_f11_Exp_1_flickering",
    "L472_f01_Exp_2_rocking_1",
    "L472_f02_Exp_2_rocking_1",
    "L472_f03_Exp_2_rocking_1",
    "L472_f04_Exp_2_rocking_1",
    "L472_f05_Exp_2_rocking_1",
    "L472_f06_Exp_2_rocking_1",
]  # e.g., ["L433_f02_Exp_1_flickering", "L472_f03_Exp_2_rocking_1"]

# Imaging parameters (global defaults). Override per-experiment below if needed.
FPS_DEFAULT = 2.0          # Hz
TAU_DEFAULT = 6          # seconds

# dF/F extraction params (tune as needed)
PERCENTILE = 8
INSTABILITY_RATIO = 0.1
MIN_WINDOW_S = 15
WINDOW_TAU_MULTIPLIER = 40
MIN_STD = 0.003  # std gate used by your function filter_inactive_rois_by_std_or_z

# Per-experiment overrides (optional). Keys must be experiment folder names.
# Example:
# PER_EXPERIMENT = {
#     "L433_f02_Exp_1_flickering": {"fps": 3.0, "tau": 0.7},
#     "L472_f03_Exp_2_rocking_1": {"fps": 4.0, "tau": 0.9},
# }
PER_EXPERIMENT = {}

print("Configured. Edit the values above as needed and run the next cell to proceed.")


Configured. Edit the values above as needed and run the next cell to proceed.


In [3]:

# ==== Imports & Helper Utilities ==============================================
import sys, io, json, traceback
import numpy as np
from datetime import datetime
from pathlib import Path

# Ensure your repository (that contains dff_extraction.py) is on sys.path if needed.
# Example (uncomment & edit): sys.path.append(r"C:\Users\suribear\code\dff_extraction_repo")
try:
    from src.dff_extraction import process_suite2p_fluorescence
except Exception as e:
    print("Could not import 'process_suite2p_fluorescence' from dff_extraction. "
          "If your repo isn't on sys.path, append it below and re-run this cell.")
    # sys.path.append(r"C:\Users\suribear\code\dff_extraction_repo")
    # from dff_extraction import process_suite2p_fluorescence
    raise

class TeeWriter:
    def __init__(self, *streams):
        self.streams = streams
    def write(self, data):
        for s in self.streams:
            s.write(data)
    def flush(self):
        for s in self.streams:
            s.flush()

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def find_experiments(base_dir: Path, whitelist=None):
    """Return list of experiment directories that contain suite2P/plane* structure."""
    candidates = []
    for child in base_dir.iterdir():
        if not child.is_dir():
            continue
        if whitelist and child.name not in whitelist:
            continue
        s2p_root = child / "03_analysis" / "functional" / "suite2P"
        if s2p_root.exists() and any(p.is_dir() and p.name.startswith("plane") for p in s2p_root.iterdir()):
            candidates.append(child)
    return sorted(candidates, key=lambda p: p.name)

def plane_dirs(exp_dir: Path):
    s2p_root = exp_dir / "03_analysis" / "functional" / "suite2P"
    return sorted([p for p in s2p_root.iterdir() if p.is_dir() and p.name.startswith("plane")], key=lambda p: p.name)

def experiment_prefix(exp_name: str) -> str:
    """Build prefix like 'L433_f02' from 'L433_f02_Exp_1_flickering'."""
    parts = exp_name.split("_")
    if len(parts) >= 2:
        return parts[0] + "_" + parts[1]
    return exp_name

def save_outputs(out_dir: Path, prefix: str, dFoF: np.ndarray, filtered_roi_indices: np.ndarray, params: dict):
    ensure_dir(out_dir)
    # File paths
    dfof_path = out_dir / f"{prefix}_dFoF.npy"
    idx_path  = out_dir / f"{prefix}_filtered_roi_indices.npy"
    npz_path  = out_dir / f"{prefix}_dFoF_outputs.npz"

    # Save arrays
    np.save(dfof_path, dFoF)
    np.save(idx_path, filtered_roi_indices)
    np.savez_compressed(npz_path, dFoF=dFoF, filtered_roi_indices=filtered_roi_indices)

    # Metadata
    meta = {
        "created_at": datetime.now().isoformat(timespec="seconds"),
        "shape_dFoF": list(dFoF.shape),
        "n_final_rois": int(dFoF.shape[1] if dFoF.ndim == 2 else 0),
        "file_prefix": prefix,
        "out_folder": str(out_dir),
        "fps": params.get("fps"),
        "tau": params.get("tau"),
        "percentile": params.get("percentile"),
        "instability_ratio": params.get("instability_ratio"),
        "min_window_s": params.get("min_window_s"),
        "window_tau_multiplier": params.get("window_tau_multiplier"),
        "min_std": params.get("min_std"),
        "filenames": {
            "dFoF_npy": dfof_path.name,
            "filtered_roi_indices_npy": idx_path.name,
            "npz_bundle": npz_path.name,
        }
    }
    with open(out_dir / "metadata.json", "w", encoding="utf-8") as f:
        json.dump(meta, f, indent=2)

print("Helpers loaded.")


Helpers loaded.


In [None]:

# ==== Batch Processing (no plots) =============================================
from datetime import datetime
import pandas as pd
import numpy as np

session_ts = datetime.now().strftime("%Y%m%d_%H%M%S")
session_log_root = BASE_DIR / f"session_logs_{session_ts}"
ensure_dir(session_log_root)

experiments = find_experiments(BASE_DIR, whitelist=EXPERIMENT_WHITELIST)
print(f"Found {len(experiments)} experiment folder(s) in: {BASE_DIR}")
if experiments:
    for e in experiments:
        print(" -", e.name)

results = []

for exp_dir in experiments:
    exp_name = exp_dir.name
    exp_prefix = experiment_prefix(exp_name)
    overrides = PER_EXPERIMENT.get(exp_name, {})
    fps = float(overrides.get("fps", FPS_DEFAULT))
    tau = float(overrides.get("tau", TAU_DEFAULT))

    planes = plane_dirs(exp_dir)
    print(f"\n=== {exp_name}: {len(planes)} plane(s) detected ===")

    for pdir in planes:
        print(f"\nProcessing {pdir} ...")
        dfof_dir = pdir / "dFoF"   # NEW output folder name
        ensure_dir(dfof_dir)

        # Per-plane log
        plane_log_path = dfof_dir / f"log_{exp_name}_{pdir.name}_{session_ts}.txt"
        session_log_path = session_log_root / f"log_{exp_name}_{pdir.name}_{session_ts}.txt"

        # Tee prints to both file and notebook output
        with open(plane_log_path, "w", encoding="utf-8") as flog, open(session_log_path, "w", encoding="utf-8") as fsess:
            tee = TeeWriter(sys.stdout, flog, fsess)
            old_stdout = sys.stdout
            try:
                sys.stdout = tee

                print(f"[{datetime.now().isoformat(timespec='seconds')}] Starting plane {pdir.name}")
                print(f"Using fps={fps}, tau={tau}")
                print("Calling process_suite2p_fluorescence...")

                dFoF, filtered_indices = process_suite2p_fluorescence(
                    f_path=pdir,
                    fps=fps,
                    tau=tau,
                    percentile=PERCENTILE,
                    instability_ratio=INSTABILITY_RATIO,
                    min_window_s=MIN_WINDOW_S,
                    window_tau_multiplier=WINDOW_TAU_MULTIPLIER,
                    min_std=MIN_STD,
                )

                if not isinstance(dFoF, np.ndarray) or dFoF.ndim != 2:
                    raise ValueError("Expected dFoF to be a 2D numpy array (T x N).")

                if not isinstance(filtered_indices, (np.ndarray, list)):
                    raise ValueError("Expected filtered_indices to be an array-like of ROI indices.")
                filtered_indices = np.array(filtered_indices)

                print("Saving arrays and metadata...")
                params = dict(
                    fps=fps, tau=tau,
                    percentile=PERCENTILE,
                    instability_ratio=INSTABILITY_RATIO,
                    min_window_s=MIN_WINDOW_S,
                    window_tau_multiplier=WINDOW_TAU_MULTIPLIER,
                    min_std=MIN_STD,
                )
                save_outputs(dfof_dir, exp_prefix, dFoF, filtered_indices, params)

                T, N = dFoF.shape
                results.append({
                    "experiment": exp_name,
                    "plane": pdir.name,
                    "n_rois_final": int(N),
                    "T": int(T),
                    "fps": fps,
                    "tau": tau,
                    "dFoF_dir": str(dfof_dir),
                    "file_prefix": exp_prefix,
                })

                print(f"Done: {pdir.name} -> N={N} ROIs, T={T}. Outputs in: {dfof_dir}")

            except Exception as ex:
                print("ERROR while processing", pdir)
                traceback.print_exc()
            finally:
                sys.stdout = old_stdout

# Summary table saved to the session log root
if results:
    import pandas as pd
    df = pd.DataFrame(results).sort_values(["experiment", "plane"])
    df_path = session_log_root / "summary.csv"
    df.to_csv(df_path, index=False, encoding="utf-8")
    print(f"\nSession summary saved to: {df_path}")
else:
    print("\nNo results to summarize. Check your BASE_DIR or whitelist settings.")


Found 16 experiment folder(s) in: C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p
 - L433_f02_Exp_1_flickering
 - L433_f03_Exp_1_flickering
 - L433_f04_Exp_1_flickering
 - L433_f05_Exp_1_flickering
 - L433_f06_Exp_1_flickering
 - L453_f07_Exp_1_flickering
 - L453_f08_Exp_1_flickering
 - L453_f09_Exp_1_flickering
 - L453_f10_Exp_1_flickering
 - L453_f11_Exp_1_flickering
 - L472_f01_Exp_2_rocking_1
 - L472_f02_Exp_2_rocking_1
 - L472_f03_Exp_2_rocking_1
 - L472_f04_Exp_2_rocking_1
 - L472_f05_Exp_2_rocking_1
 - L472_f06_Exp_2_rocking_1

=== L433_f02_Exp_1_flickering: 5 plane(s) detected ===

Processing C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p\L433_f02_Exp_1_flickering\03_analysis\functional\suite2P\plane0 ...
[2025-10-27T17:33:56] Starting plane plane0
Using fps=2.0, tau=6.0
Calling process_suite2p_fluorescence...
Excluded 139 non-cell ROIs. Remaining: 449 cells.
Removed 0 dim ROIs.


In [4]:
# (Optional) Display summary table if available — no external helpers
import pandas as pd
from pathlib import Path
try:
    from IPython.display import display  # works in Jupyter
except Exception:
    display = None

session_dirs = sorted([p for p in BASE_DIR.iterdir() if p.is_dir() and p.name.startswith("session_logs_")])
if session_dirs:
    latest = session_dirs[-1]
    summary_csv = latest / "summary.csv"
    if summary_csv.exists():
        df = pd.read_csv(summary_csv)
        if display is not None:
            display(df)
        else:
            print(df.to_string(index=False))
        print(f"Displayed summary from: {summary_csv}")
    else:
        print(f"No summary.csv in {latest}")
else:
    print("No session_logs_* folder found yet.")



Unnamed: 0,experiment,plane,n_rois_final,T,fps,tau,dFoF_dir,file_prefix
0,L433_f02_Exp_1_flickering,plane0,442,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L433_f02
1,L433_f02_Exp_1_flickering,plane1,435,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L433_f02
2,L433_f02_Exp_1_flickering,plane2,468,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L433_f02
3,L433_f02_Exp_1_flickering,plane3,372,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L433_f02
4,L433_f02_Exp_1_flickering,plane4,267,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L433_f02
...,...,...,...,...,...,...,...,...
74,L472_f06_Exp_2_rocking_1,plane0,242,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L472_f06
75,L472_f06_Exp_2_rocking_1,plane1,294,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L472_f06
76,L472_f06_Exp_2_rocking_1,plane2,296,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L472_f06
77,L472_f06_Exp_2_rocking_1,plane3,230,3340,2.0,6.0,C:\Users\suribear\OneDrive - Université de Lau...,L472_f06


Displayed summary from: C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p\session_logs_20251027_173356\summary.csv


In [6]:
# ================================================================
# Merge per-plane dFoF into a single experiment-level file
# ================================================================
from pathlib import Path
import json, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# ---------------- CONFIG ----------------
BASE_DIR = Path(r"C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p")


# EXPERIMENT_WHITELIST = [
#     "L433_f02_Exp_1_flickering",
#     "L453_f10_Exp_1_flickering",
#     "L472_f03_Exp_2_rocking_1"
# ]
EXPERIMENT_WHITELIST = [
    "L433_f02_Exp_1_flickering",
    "L433_f03_Exp_1_flickering",
    "L433_f04_Exp_1_flickering",
    "L433_f05_Exp_1_flickering",
    "L433_f06_Exp_1_flickering",
    "L453_f07_Exp_1_flickering",
    "L453_f08_Exp_1_flickering",
    "L453_f09_Exp_1_flickering",
    "L453_f10_Exp_1_flickering",
    "L453_f11_Exp_1_flickering",
    "L472_f01_Exp_2_rocking_1",
    "L472_f02_Exp_2_rocking_1",
    "L472_f03_Exp_2_rocking_1",
    "L472_f04_Exp_2_rocking_1",
    "L472_f05_Exp_2_rocking_1",
    "L472_f06_Exp_2_rocking_1",
]
ALIGN_MODE = "truncate"   # or "pad_nan"
DO_PLOT   = True
PLOT_PCTL = 99.0

# ---------------- HELPERS ----------------
def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def safe_to_csv(df, path: Path):
    """Write CSV safely even if file is open."""
    try:
        df.to_csv(path, index=False, encoding="utf-8")
        return path
    except PermissionError:
        alt = path.with_name(path.stem + "_" + datetime.now().strftime("%Y%m%d_%H%M%S") + path.suffix)
        df.to_csv(alt, index=False, encoding="utf-8")
        print(f"[warn] CSV locked: wrote to {alt.name} instead of {path.name}")
        return alt

def safe_savefig(fig, path: Path, **kwargs):
    """Save figure safely even if file is open."""
    try:
        fig.savefig(path, **kwargs)
        return path
    except PermissionError:
        alt = path.with_name(path.stem + "_" + datetime.now().strftime("%Y%m%d_%H%M%S") + path.suffix)
        fig.savefig(alt, **kwargs)
        print(f"[warn] PNG locked: wrote to {alt.name} instead of {path.name}")
        return alt

def find_experiments(base_dir: Path, whitelist=None):
    out = []
    for child in base_dir.iterdir():
        if not child.is_dir():
            continue
        if whitelist and child.name not in whitelist:
            continue
        s2p = child / "03_analysis" / "functional" / "suite2P"
        if s2p.exists() and any(p.is_dir() and p.name.startswith("plane") for p in s2p.iterdir()):
            out.append(child)
    return sorted(out, key=lambda p: p.name)

def plane_dirs(exp_dir: Path):
    s2p = exp_dir / "03_analysis" / "functional" / "suite2P"
    return sorted([p for p in s2p.iterdir() if p.is_dir() and p.name.startswith("plane")], key=lambda p: p.name)

def experiment_prefix(exp_name: str) -> str:
    parts = exp_name.split("_")
    return f"{parts[0]}_{parts[1]}" if len(parts) >= 2 else exp_name

def read_params_from_metadata(dfof_dir: Path):
    fps = None
    tauDecay = None
    meta_path = dfof_dir / "metadata.json"
    if meta_path.exists():
        try:
            with open(meta_path, "r", encoding="utf-8") as f:
                meta = json.load(f)
            fps = meta.get("fps", None)
            tauDecay = meta.get("tauDecay", meta.get("tau", None))
        except Exception:
            pass
    return fps, tauDecay

def load_dfof_for_plane(dfof_dir: Path, prefix: str):
    for c in [dfof_dir / f"{prefix}_dFoF.npy", dfof_dir / f"dFoF_{prefix}.npy"]:
        if c.exists():
            return np.load(c), c.name
    return None, None

def load_filtered_indices_for_plane(dfof_dir: Path, prefix: str, N_cols: int):
    candidates = [
        dfof_dir / f"{prefix}_filtered_roi_indices.npy",
        dfof_dir / "filtered_roi_indices.npy",
        dfof_dir / "__filtered_roi_indices.npy",
    ]
    hit = None
    for c in candidates:
        if c.exists():
            hit = c
            break
    if not hit:
        for c in sorted(dfof_dir.glob("*filtered*roi*indices*.npy")):
            hit = c
            break
    if hit:
        arr = np.load(hit)
        if arr.ndim == 1 and arr.shape[0] == N_cols:
            return arr.astype(int), hit.name
        return np.arange(N_cols, dtype=int), hit.name
    return np.arange(N_cols, dtype=int), None

# ---------------- MAIN LOOP ----------------
experiments = find_experiments(BASE_DIR, whitelist=EXPERIMENT_WHITELIST)
print(f"Found {len(experiments)} experiments to merge.\n")

for exp_dir in experiments:
    exp_name = exp_dir.name
    prefix   = experiment_prefix(exp_name)
    planes   = plane_dirs(exp_dir)
    if not planes:
        print(f"[skip] {exp_name}: no planes found.\n")
        continue

    per_plane = []
    for pdir in planes:
        dfof_dir = pdir / "dFoF"
        if not dfof_dir.exists():
            print(f"[warn] {exp_name} {pdir.name}: missing dFoF folder")
            continue

        A, src_name = load_dfof_for_plane(dfof_dir, prefix)
        if A is None:
            print(f"[warn] {exp_name} {pdir.name}: no dFoF file found")
            continue

        fps, tauDecay = read_params_from_metadata(dfof_dir)
        fi_vec, fi_src = load_filtered_indices_for_plane(dfof_dir, prefix, A.shape[1])
        per_plane.append({
            "plane": pdir.name,
            "dfof": A,
            "filtered_indices": fi_vec,
            "dfof_source": src_name,
            "fi_source": fi_src or "",
            "fps": fps,
            "tauDecay": tauDecay,
        })

    if not per_plane:
        print(f"[skip] {exp_name}: no dFoF arrays found in any plane.\n")
        continue

    # --- Align time dimension ---
    Ts = [x["dfof"].shape[0] for x in per_plane]
    if ALIGN_MODE == "truncate":
        T_common = min(Ts)
        if len(set(Ts)) > 1:
            print(f"[info] {exp_name}: truncating all planes to T={T_common} (min over {Ts})")
        aligned = []
        for x in per_plane:
            y = x.copy()
            y["dfof"] = x["dfof"][:T_common, :]
            aligned.append(y)
        T_final = T_common
    else:
        T_max = max(Ts)
        aligned = []
        for x in per_plane:
            d = x["dfof"]
            T_i, N_i = d.shape
            if T_i < T_max:
                pad = np.full((T_max - T_i, N_i), np.nan, dtype=d.dtype)
                d = np.vstack([d, pad])
            y = x.copy()
            y["dfof"] = d
            aligned.append(y)
        T_final = T_max

    # --- Concatenate across neurons ---
    merged_list = []
    mapping_rows = []
    merged_filtered_roi_idx = []
    col_offset = 0
    for item in aligned:
        d = item["dfof"]
        N = d.shape[1]
        merged_list.append(d)
        merged_filtered_roi_idx.extend(list(item["filtered_indices"]))
        for j in range(N):
            mapping_rows.append({
                "plane": item["plane"],
                "roi_index_in_plane": j,
                "filtered_roi_index": int(item["filtered_indices"][j]),
                "global_col": col_offset + j,
                "source_dfof_file": item["dfof_source"],
                "source_fi_file": item["fi_source"],
            })
        col_offset += N

    merged = np.concatenate(merged_list, axis=1)
    merged_filtered_roi_idx = np.asarray(merged_filtered_roi_idx, dtype=int)

    # --- Save outputs per experiment ---
    out_merge = exp_dir / "03_analysis" / "functional" / "suite2P" / "merged_dFoF"
    out_plots = exp_dir / "03_analysis" / "functional" / "plots" / "merged_dFoF"
    ensure_dir(out_merge); ensure_dir(out_plots)

    merged_npy = out_merge / f"{prefix}_dFoF_merged.npy"
    np.save(merged_npy, merged)

    merged_fi_npy = out_merge / f"{prefix}_dFoF_merged_filtered_roi_indices.npy"
    np.save(merged_fi_npy, merged_filtered_roi_idx)

    map_csv = out_merge / f"{prefix}_dFoF_merged_map.csv"
    _ = safe_to_csv(pd.DataFrame(mapping_rows), map_csv)

    # --- Plot & Save Visualization ---
    if DO_PLOT:
        with np.errstate(invalid="ignore"):
            col_max = np.nanmax(merged, axis=0)
        order = np.argsort(-col_max)
        merged_sorted = merged[:, order]
        finite_vals = merged_sorted[np.isfinite(merged_sorted)]
        vmax = np.percentile(finite_vals, PLOT_PCTL) if finite_vals.size else None
        vmin = 0.0

        plt.figure(figsize=(11, 6))
        plt.imshow(merged_sorted.T, aspect="auto", origin="lower",
                   cmap="gray_r", vmin=vmin, vmax=vmax)
        plt.xlabel("Time (frames)")
        plt.ylabel("Neuron (sorted by max ΔF/F)")
        plt.title(f"{prefix} — merged dFoF raster (sorted by max)")
        plt.tight_layout()

        plot_png = out_plots / f"{prefix}_dFoF_merged_raster_sorted_by_max.png"
        _ = safe_savefig(plt.gcf(), plot_png, dpi=150, bbox_inches="tight")
        plt.close()

        order_csv = out_plots / f"{prefix}_dFoF_merged_sort_index.csv"
        _ = safe_to_csv(pd.DataFrame({"plot_col_order": order}), order_csv)

        plot_map_csv = out_plots / f"{prefix}_dFoF_merged_map_sorted.csv"
        _ = safe_to_csv(pd.DataFrame([mapping_rows[i] for i in order]), plot_map_csv)

    print(f"[ok] {exp_name}: merged → {merged_npy.name}, map → {map_csv.name}, "
          f"fi → {merged_fi_npy.name}, plots in {out_plots.name}\n")


Found 1 experiments to merge.

[warn] CSV locked: wrote to L433_f02_dFoF_merged_map_20251028_142035.csv instead of L433_f02_dFoF_merged_map.csv
[warn] CSV locked: wrote to L433_f02_dFoF_merged_sort_index_20251028_142037.csv instead of L433_f02_dFoF_merged_sort_index.csv
[warn] CSV locked: wrote to L433_f02_dFoF_merged_map_sorted_20251028_142037.csv instead of L433_f02_dFoF_merged_map_sorted.csv
[ok] L433_f02_Exp_1_flickering: merged → L433_f02_dFoF_merged.npy, map → L433_f02_dFoF_merged_map.csv, fi → L433_f02_dFoF_merged_filtered_roi_indices.npy, plots in merged_dFoF



In [7]:
# ==== Validate one experiment's merged dFoF ====================================
from pathlib import Path
import json, numpy as np, pandas as pd

# ---- Set these ----
BASE_DIR  = Path(r"C:\Users\suribear\OneDrive - Université de Lausanne\Lab\Data\2p")
EXP_NAME  = "L433_f03_Exp_1_flickering"     # change if you want another
SAMPLE_COLS = 100                           # number of merged columns to spot-check (<= N_total)

# ---- Helpers ----
def experiment_prefix(exp_name: str) -> str:
    parts = exp_name.split("_")
    return f"{parts[0]}_{parts[1]}" if len(parts) >= 2 else exp_name

def plane_dirs(exp_dir: Path):
    s2p = exp_dir / "03_analysis" / "functional" / "suite2P"
    return sorted([p for p in s2p.iterdir() if p.is_dir() and p.name.startswith("plane")], key=lambda p: p.name)

def find_merged_dir(exp_dir: Path, prefix: str):
    # prefer suite2P/merged_dFoF; fallback to functional/plots/merged_dFoF
    candidates = [
        exp_dir / "03_analysis" / "functional" / "suite2P" / "merged_dFoF",
        exp_dir / "03_analysis" / "functional" / "plots"   / "merged_dFoF",
    ]
    for c in candidates:
        if (c / f"{prefix}_dFoF_merged.npy").exists():
            return c
    # default to first candidate
    return candidates[0]

def load_dfof_for_plane(dfof_dir: Path, prefix: str):
    for c in [dfof_dir / f"{prefix}_dFoF.npy", dfof_dir / f"dFoF_{prefix}.npy"]:
        if c.exists():
            return np.load(c), c.name
    return None, None

def load_filtered_indices_for_plane(dfof_dir: Path, prefix: str, N_cols: int):
    # common names
    candidates = [
        dfof_dir / f"{prefix}_filtered_roi_indices.npy",
        dfof_dir / "filtered_roi_indices.npy",
        dfof_dir / "__filtered_roi_indices.npy",
    ]
    hit = None
    for c in candidates:
        if c.exists():
            hit = c; break
    if not hit:
        # any *filtered*roi*indices*.npy
        for c in sorted(dfof_dir.glob("*filtered*roi*indices*.npy")):
            hit = c; break
    if hit:
        arr = np.load(hit)
        if arr.ndim == 1 and arr.shape[0] == N_cols:
            return arr.astype(int)
    # fallback: identity vector [0..N-1]
    return np.arange(N_cols, dtype=int)

def approx_equal(a, b, rtol=1e-6, atol=1e-8):
    # compare allowing NaNs (treat NaN==NaN)
    mask = ~(np.isnan(a) & np.isnan(b))
    return np.allclose(a[mask], b[mask], rtol=rtol, atol=atol)

# ---- Locate experiment & merged outputs ----
exp_dir = BASE_DIR / EXP_NAME
prefix  = experiment_prefix(EXP_NAME)
planes  = plane_dirs(exp_dir)
if not planes:
    raise RuntimeError("No plane* dirs found.")

merged_dir = find_merged_dir(exp_dir, prefix)
merged      = np.load(merged_dir / f"{prefix}_dFoF_merged.npy")                         # (T_merged, N_total)
merged_roi  = np.load(merged_dir / f"{prefix}_dFoF_merged_filtered_roi_indices.npy")    # (N_total,)
map_df      = pd.read_csv(merged_dir / f"{prefix}_dFoF_merged_map.csv")                 # global_col, plane, roi_index_in_plane, filtered_roi_index, ...

T_merged, N_total = merged.shape
assert merged_roi.shape == (N_total,), f"ROI index vector length {merged_roi.shape[0]} != N_total {N_total}"
assert map_df.shape[0] == N_total, f"Map rows {map_df.shape[0]} != N_total {N_total}"

print(f"[info] merged shape = {merged.shape}, map rows = {len(map_df)}")

# ---- Gather per-plane raw dFoF + filtered indices ----
per_plane = []
Ts = []
for pdir in planes:
    dfof_dir = pdir / "dFoF"
    A, src = load_dfof_for_plane(dfof_dir, prefix)
    if A is None:
        print(f"[warn] {pdir.name}: missing dFoF, skipping for validation")
        continue
    Ts.append(A.shape[0])
    fi = load_filtered_indices_for_plane(dfof_dir, prefix, A.shape[1])
    per_plane.append({"plane": pdir.name, "A": A, "fi": fi})

if not per_plane:
    raise RuntimeError("No usable dFoF arrays found to validate against.")

# ---- Detect alignment mode ----
T_min, T_max = min(Ts), max(Ts)
if T_merged == T_min:
    align_mode = "truncate"
elif T_merged == T_max:
    align_mode = "pad_nan"
else:
    align_mode = f"custom ({T_merged=} vs min={T_min}, max={T_max})"
print(f"[info] detected align mode: {align_mode}")

# ---- Rebuild the expected merged ROI index vector from planes and compare ----
expected_roi = []
for item in per_plane:
    expected_roi.extend(list(item["fi"]))
expected_roi = np.asarray(expected_roi, dtype=int)

if expected_roi.shape[0] != N_total:
    print(f"[warn] expected ROI vector length {expected_roi.shape[0]} != N_total {N_total} "
          f"(did some planes miss dFoF during validation?)")

# Map can come from fewer planes if some were missing; compare overlapping length
L = min(expected_roi.shape[0], merged_roi.shape[0])
roi_match = np.array_equal(expected_roi[:L], merged_roi[:L])
print(f"[check] merged_filtered_roi_indices match expected (first {L}): {roi_match}")

# Also check the CSV's 'filtered_roi_index' equals merged_roi
csv_match = np.array_equal(map_df["filtered_roi_index"].values[:L], merged_roi[:L])
print(f"[check] map CSV filtered_roi_index matches merged vector (first {L}): {csv_match}")

# ---- Spot-check that merged columns equal the correct plane ROI columns ----
# Choose up to SAMPLE_COLS valid columns
rng = np.random.default_rng(0)
sample_cols = np.arange(N_total) if N_total <= SAMPLE_COLS else rng.choice(N_total, SAMPLE_COLS, replace=False)

fail_count = 0
for k in sample_cols:
    row = map_df.iloc[k]
    plane_name = row["plane"]
    roi_in_plane = int(row["roi_index_in_plane"])
    # find plane array
    match = next((x for x in per_plane if x["plane"] == plane_name), None)
    if match is None:
        print(f"[warn] plane {plane_name} not found in validation set; skipping col {k}")
        continue
    A = match["A"]
    col_src = A[:, roi_in_plane]
    # align time
    if align_mode == "truncate":
        col_src = col_src[:T_merged]
    elif align_mode.startswith("pad_nan"):
        if len(col_src) < T_merged:
            pad = np.full(T_merged - len(col_src), np.nan, dtype=col_src.dtype)
            col_src = np.concatenate([col_src, pad], axis=0)
    # compare
    ok = approx_equal(merged[:, k], col_src)
    if not ok:
        fail_count += 1
        if fail_count <= 10:
            print(f"[mismatch] col {k}: plane={plane_name}, roi={roi_in_plane}")

print(f"[summary] columns checked: {len(sample_cols)}, mismatches: {fail_count}")
print("PASS" if roi_match and csv_match and fail_count == 0 else "CHECK WARNINGS ABOVE")



[info] merged shape = (3340, 1533), map rows = 1533
[info] detected align mode: truncate
[check] merged_filtered_roi_indices match expected (first 1533): True
[check] map CSV filtered_roi_index matches merged vector (first 1533): True
[summary] columns checked: 100, mismatches: 0
PASS
