# Preprocessing of revision experiments

This notebook contains the preprocessing steps for the revision experiments. It lists
the expected sessions and checks that they exists on flexilims

In [None]:
sessions = {
    "PZAG17.3a_S20250402": "motor",
    "PZAG17.3a_S20250319": "multidepth",
    "PZAG17.3a_S20250306": "spheretube_5",
    "PZAG17.3a_S20250305": "spheretube_4",
    "PZAG17.3a_S20250303": "spheretube_3",
    "PZAG17.3a_S20250228": "spheretube_2",
    "PZAG17.3a_S20250227": "spheretube_1",
    "PZAG16.3c_S20250401": "motor",
    "PZAG16.3c_S20250317": "multidepth",
    "PZAG16.3c_S20250313": "spheretube_5",
    "PZAG16.3c_S20250310": "spheretube_4",
    "PZAG16.3c_S20250221": "spheretube_3",
    "PZAG16.3c_S20250220": "spheretube_2",
    "PZAG16.3c_S20250219": "spheretube_1",
    "PZAG16.3b_S20250401": "motor",
    "PZAG16.3b_S20250317": "multidepth",
    "PZAG16.3b_S20250313": "spheretube_5",
    "PZAG16.3b_S20250310": "spheretube_4",
    "PZAG16.3b_S20250226": "spheretube_3",
    "PZAG16.3b_S20250225": "spheretube_2",
    "PZAG16.3b_S20250224": "spheretube_1",
    "PZAH17.1e_S20250403": "motor",
    "PZAH17.1e_S20250318": "multidepth",
    "PZAH17.1e_S20250313": "multidepth",
    "PZAH17.1e_S20250311": "spheretube_5",
    "PZAH17.1e_S20250307": "spheretube_4",
    "PZAH17.1e_S20250306": "spheretube_3",
    "PZAH17.1e_S20250305": "spheretube_2",
    "PZAH17.1e_S20250304": "spheretube_1",
}

print(f"{len(sessions)} sessions to analyze")

In [None]:
import flexiznam as flz

project = "colasa_3d-vision_revisions"
TEMPORARY_FIX = True
flm_sess = flz.get_flexilims_session(project_id=project)
if TEMPORARY_FIX:
    all_sessions = flz.get_entities(datatype="session", flexilims_session=flm_sess)

valid_sessions = dict()
for session, protocol in sessions.items():
    if TEMPORARY_FIX:
        sess = all_sessions[all_sessions["name"] == session]
        if not len(sess):
            sess = None
        else:
            assert len(sess) == 1
            sess = sess.iloc[0]
    else:
        sess = flz.get_entity(name=session, project_id=project, datatype="session")
    if sess is None:
        print(f"Session {session} doesn't exist")
        continue
    valid_sessions[session] = [sess, protocol]
print(f"{len(valid_sessions)}/{len(sessions)} valid sessions to analyze")

In [None]:
# Now check that we have the recordings we need
for session_name, (session, protocol) in valid_sessions.items():
    # Get recordings children of the session
    recordings = flz.get_children(
        session.id, children_datatype="recording", flexilims_session=flm_sess
    )
    if not len(recordings):
        print(f"No recordings for session {session_name}")
        continue
    assert (
        "SpheresPermTubeReward" in recordings.protocol.values
    ), f"Session {session_name} doesn't have the Sphere protocol"
    if protocol == "motor":
        assert (
            "SpheresTubeMotor" in recordings.protocol.values
        ), f"Session {session_name} doesn't have the motor protocol"
    elif protocol == "multidepth":
        assert (
            "SpheresPermTubeReward_multidepth" in recordings.protocol.values
        ), f"Session {session_name} doesn't have the multidepth protocol"

In [None]:
# There should be a suite2p dataset for each session
bad_sessions = []
for session_name, (session, protocol) in valid_sessions.items():
    # Get recordings children of the session
    suite2p_dataset = flz.get_entity(
        project_id=project,
        datatype="dataset",
        origin_id=session.id,
        query_key="dataset_type",
        query_value="suite2p_rois",
    )
    if suite2p_dataset is None:
        print(f"Session {session_name} doesn't have a suite2p dataset")
        bad_sessions.append(session_name)

print(f"{len(bad_sessions)} sessions don't have a suite2p dataset")
# remove them from valid_sessions
for session_name in bad_sessions:
    del valid_sessions[session_name]
print(f"{len(valid_sessions)} sessions to analyze after removing bad sessions")

In [None]:
# Cell to re-run suite2p

# If you want to run this cell, you'll need extra dependencies of 2p-preprocess:
# pip install suite2p, jax, optax --no-deps
# And twop_preprocess itself
# Note that if use_slurm is true, they just need to be imported successfully, another job
# will actually run the code

from pathlib import Path
from twop_preprocess.calcium import extract_session

slurm_folder = Path.home() / "slurm_logs" / project

conflicts = "overwrite"
run_split = False
run_suite2p = True
run_dff = True
if True:
    for session_name in bad_sessions:
        mouse_name = session_name.split("_")[0]
        mouse = flz.get_entity(
            name=mouse_name, datatype="mouse", flexilims_session=flm_sess
        )
        gcamp = mouse.genotype_text.split("GCaMP")[1][:2]
        if gcamp == "6f":
            flow_threshold = 2
            cellprob_threshold = 0
        elif gcamp == "6s":
            flow_threshold = 4
            cellprob_threshold = -3
        else:
            raise ValueError(
                f"Unknown Gcamp version: {gcamp} in mouse {mouse_name} with"
                + " genotype: {mouse.genotype_text}"
            )
        ops = {
            "tau": 0.7,
            "ast_neuropil": False,
            "delete_bin": False,
            "roidetect": True,
            "flow_threshold": flow_threshold,
            "cellprob_threshold": cellprob_threshold,
        }
        # delete None values
        (slurm_folder / session_name).mkdir(exist_ok=True, parents=True)
        ops = {k: v for k, v in ops.items() if v is not None}
        extract_session(
            project,
            session,
            conflicts=conflicts,
            run_split=run_split,
            run_suite2p=run_suite2p,
            run_dff=run_dff,
            ops=ops,
            use_slurm=True,
            slurm_folder=slurm_folder / session_name,
        )
        break

In [None]:
from cottage_analysis.pipelines import pipeline_utils

running = ["PZAG16.3b_S20250317", "PZAG16.3b_S20250401", "PZAG16.3c_S20250401"]
if False:
    for session_name in valid_sessions:
        if session_name in running:
            print(f"Session {session_name} is already running")
            continue
        print(f"Submitting session {session_name} to the pipeline")
        pipeline_utils.sbatch_session(
            project=project,
            session_name=session_name,
            pipeline_filename="run_analysis_pipeline.sh",
            conflicts="skip",
            photodiode_protocol=5,
        )

In [None]:
bad = "PZAH17.1e_S20250318"

sess, protocol = valid_sessions[bad]
sess

In [None]:
import tifffile

ds = flz.Dataset.from_flexilims(
    id="67ed63e5b99b5006b4e789b5", flexilims_session=flm_sess
)
ds

In [None]:
img = tifffile.imread(ds.path_full / ds.extra_attributes["tif_files"][0])
img.shape

In [None]:
flm_sess.delete(ds.id)