## This is code to look at raw traces and help diagnose problems. 

This notebook works  with the `2p-preprocess` environment

In [None]:
import numpy as np
import pickle
import os
import matplotlib.pyplot as plt
from  pathlib import Path
from tqdm import tqdm
import twop_preprocess.calcium as twop
from tifffile import TiffFile
from sklearn import mixture
import flexiznam as flz
from flexiznam.schema import Dataset


In [None]:
BASE = Path("/camp/lab/znamenskiyp/home/shared/projects/depth_mismatch_seq")
DATA  = Path("/camp/lab/znamenskiyp/data/instruments/raw_data/projects/depth_mismatch_seq/")
MOUSE = "BRAC9057.4j"
SESSION = "S20240517"
#RECORDING = "R170256_KellerTube"
RECORDING = "R152318_SpheresPermTubeReward"



In [None]:
project = "663214d08993fd0b6e6b5f1d"
session_name = f"{MOUSE}_{SESSION}"

flexilims_session = flz.get_flexilims_session(project_id=project)

exp_session = flz.get_entity(
    datatype="session", name=session_name, flexilims_session=flexilims_session
)

si_datasets = flz.get_datasets(
    exp_session['id'],
    dataset_type='suite2p_rois',
    flexilims_session=flexilims_session
)

suite2p_dataset = si_datasets[0]

In [None]:
base_path = BASE / MOUSE / SESSION / "suite2p_rois_0"
base_path

In [None]:
plane_list =  []
for i in range(6):
    plane_path =  base_path  /  f'plane{i}'
    plane_list.append(plane_path)

In [None]:
def load_npy_files(directory):
    npy_dict = {}
    
    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Check if the file is a .npy file
        if filename.endswith('.npy'):
            # Create the full file path
            file_path = os.path.join(directory, filename)
            
            # Load the .npy file
            data = np.load(file_path, allow_pickle=True)
            
            # Use the filename without the extension as the key
            key = os.path.splitext(filename)[0]
            
            # Store the data in the dictionary
            npy_dict[key] = data
    
    return npy_dict

In [None]:
trace_list = []
for i in tqdm(range(len(plane_list))):
    trace  =  load_npy_files(plane_list[i])
    trace_list.append(trace)

In [None]:
idx =  2
trace_list[idx].keys()

In [None]:
trace_list[idx]["F"].shape

## Plotting F

In [None]:
top = 3000
fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "F"

for i in tqdm(range(20)):
    ax.plot(trace_list[idx][subject][i, :]+(top*i))
    ax.axhline(0+(top*i))

fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
fig = plt.figure(figsize=(30, 10), facecolor='w')
ax = fig.add_subplot(111)
ax.plot(trace_list[idx]["F"][2, 5000:6000])

## Finding out F0

In [None]:
trace_list[idx]["f0_ast"].shape

In [None]:
plt.hist(trace_list[idx]["f0_ast"])

In [None]:


for i in tqdm(range(len(trace_list))):
    fig = plt.figure(figsize=(5, 2), facecolor='w')
    ax = fig.add_subplot(111)
    ax.hist(trace_list[i]["f0_ast"])

##  After neuropil correction

In [None]:
top = 3000

fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "Fast"

for i in tqdm(range(20)):
    ax.plot(trace_list[idx][subject][i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)

fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

## And now plotting dff

In [None]:
top = 6
fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "dff_ast"

for i in tqdm(range(20)):
    ax.plot(trace_list[idx][subject][i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)


fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
top = 1000
fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "Fneu"

for i in tqdm(range(20)):
    ax.plot(trace_list[idx][subject][i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)


fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
trace_list[idx].keys()

In [None]:
ops = trace_list[idx]['ops'].item()

In [None]:
ops["correct_offset"]

## Calculating  $dF/F$ only for one recording  
  
We first split F and Fneu for all recordings, and then re-calculate dF/F and use that for new raster. 

In [None]:
RECORDING = "R154643_SpheresPermTubeReward"

In [None]:
base_path = BASE / MOUSE / SESSION / RECORDING / "suite2p_traces_0"
base_path

In [None]:
data_path = DATA / MOUSE / SESSION / RECORDING

In [None]:
plane_list =  []
for i in range(6):
    plane_path =  base_path  /  f'plane{i}'
    plane_list.append(plane_path)

rec_trace_list = []
for i in tqdm(range(len(plane_list))):
    trace  =  load_npy_files(plane_list[i])
    rec_trace_list.append(trace)


In [None]:
rec_trace_list[idx]["F"].shape

### Estimate offset

In [None]:
n_components= 3

# find the first tiff at the path
tiffs = list(Path(data_path).glob("*.tif"))
if len(tiffs) == 0:
    raise ValueError(f"No tiffs found at {data_path}")
tiff = tiffs[0]
# load the tiff using tifffile
with TiffFile(tiff) as tif:
    # get the first frame
    frame = tif.asarray(key=0)
# find the offset
gmm = mixture.GaussianMixture(n_components=n_components, random_state=42).fit(
    frame.reshape(-1, 1)
)
gmm_means = np.sort(gmm.means_[:, 0])

In [None]:
# Plot the GMM components
x = np.linspace(min(frame.reshape(-1, 1)), max(frame.reshape(-1, 1)), 1000)
logprob = gmm.score_samples(x)
pdf = np.exp(logprob)
print(pdf.shape)
probs = gmm.predict_proba(x)
print(probs.shape)
pdf_individual = probs * pdf[:, np.newaxis]

plt.plot(x, pdf, '-k', label='Total')
plt.plot(x, pdf_individual, '--', label='Components')
plt.legend(loc='best')

plt.title(f'Gaussian Mixture Model Fit, {RECORDING}')
plt.xlabel('Data')
plt.ylabel('Density')
plt.show()

In [None]:
plt.hist(frame.reshape(-1, 1), bins = 200)
colors = ["red", "green", "brown"]
for idx, mean in enumerate(gmm_means):
    plt.axvline(mean,  color = colors[idx], alpha  = 0.2, label = f"component_{idx}")
plt.title(f"Histogram of values in first frame, n = {len(frame.reshape(-1, 1))}, {RECORDING}")
plt.legend()


In [None]:
recordings = ["R154643_SpheresPermTubeReward", "R164953_KellerTube", "R171153_KellerTube_playback", "R172517_KellerTube"]

Let's check what is the variability recording to recording. 

In [None]:
n_components= 3

means =np.zeros((len(recordings), n_components))


for i, recording in enumerate(recordings):
    rec_data_path = DATA / MOUSE / SESSION / recording

    # find the first tiff at the path
    tiffs = list(Path(rec_data_path).glob("*.tif"))
    if len(tiffs) == 0:
        raise ValueError(f"No tiffs found at {rec_data_path}")
    tiff = tiffs[0]
    # load the tiff using tifffile
    with TiffFile(tiff) as tif:
        # get the first frame
        frame = tif.asarray(key=0)
    # find the offset
    gmm = mixture.GaussianMixture(n_components=n_components, random_state=42).fit(
        frame.reshape(-1, 1)
    )
    gmm_means = np.sort(gmm.means_[:, 0])

    means[i, :] = gmm_means

In [None]:
means

##  Offset correction

Okay, we offset-correct the traces, to eliminate possible issues step by step. Because we have four offsets, we chop the recording in four. 

In [None]:
rec_lengths = list(np.zeros(len(recordings)))
for index, recording in tqdm(enumerate(recordings)):
    rec_base_path = BASE / MOUSE / SESSION / recording / "suite2p_traces_0"

    plane_list =  []
    for i in range(6):
        plane_path =  rec_base_path  /  f'plane{i}'
        plane_list.append(plane_path)

    rec_trace_list = []
    for i in tqdm(range(len(plane_list))):
        trace  =  load_npy_files(plane_list[i])
        rec_trace_list.append(trace)
    
    rec_lengths[index] =  rec_trace_list[idx]["F"].shape[1]


In [None]:
rec_intervals = np.zeros((len(rec_lengths), 2), dtype = int)

for index, i in enumerate(rec_lengths):
    if index == 0:
        start = 0
    else:
        start = end+1
    end = start+i
    rec_intervals[index, :] = [int(start), int(end)]

rec_intervals 

In [None]:
Foffset = np.zeros_like(trace_list[idx]["F"])
for start, end, offset in zip(rec_intervals[:, 0], rec_intervals[:, 1], means[:,0]):
    Foffset[:, start:end] = trace_list[idx]["F"][:, start:end] - offset

In [None]:
Fneuoffset = np.zeros_like(trace_list[idx]["Fneu"])
for start, end, offset in zip(rec_intervals[:, 0], rec_intervals[:, 1], means[:,0]):
    Fneuoffset[:, start:end] = trace_list[idx]["Fneu"][:, start:end] - offset

In [None]:
top = 3000

fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "Fast"

for i in tqdm(range(20)):
    ax.plot(trace_list[idx][subject][i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)

fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
top = 3000
fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "Foffset"

for i in tqdm(range(20)):
    ax.plot(Foffset[i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)


fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
plt.plot(Foffset[4, :])

##  Detrending. 

In [None]:
fs =  suite2p_dataset.extra_attributes["fs"] 

In [None]:
def detrend(F, first_frames, last_frames, ops, fs):
    """
    Detrend the concatenated fluorescence trace for each recording.

    Args:
        F (numpy.ndarray): shape nrois x time, raw fluorescence trace for all rois extracted from suite2p
        first_frames (numpy.ndarray): shape nrecordings, first frame of each recording
        last_frames (numpy.ndarray): shape nrecordings, last frame of each recording
        ops (dict): dictionary of suite2p settings

    Returns:
        F (numpy.ndarray): shape nrois x time, detrended fluorescence trace for all rois extracted from suite2p

    """
    win_frames = int(ops["detrend_win"] * fs)
    for i, (start, end) in enumerate(zip(first_frames, last_frames)):
        rec_rolling_baseline  = np.zeros_like(F[:, start:end])
        for j in range(F.shape[0]):
            rolling_baseline = np.pad(
                twop.rolling_percentile(
                    F[j, start:end], 
                    win_frames,
                    ops["detrend_pctl"],
                ),
                (win_frames//2, win_frames//2 - 1),
                mode='edge',
            )

            rec_rolling_baseline[j, :] = rolling_baseline

        if i == 0:
            first_recording_baseline = np.median(rec_rolling_baseline, axis = 1)
            first_recording_baseline = first_recording_baseline.reshape(-1, 1)  
        if ops["detrend_method"] == "subtract":
            F[:, start:end] -= rec_rolling_baseline - first_recording_baseline
        else:
            F[:, start:end] /= rec_rolling_baseline / first_recording_baseline
    return F

In [None]:
F_throwaway = Foffset.copy()
Fneu_throwaway =Fneuoffset.copy()
Fdet = detrend(F_throwaway, rec_intervals[:, 0], rec_intervals[:, 1], ops, fs)
Fneudet = detrend(Fneu_throwaway, rec_intervals[:, 0], rec_intervals[:, 1], ops, fs)

In [None]:
top = 3000
fig = plt.figure(figsize=(30, 30), facecolor='w')
ax = fig.add_subplot(111)

subject = "Fdet"

for i in tqdm(range(20)):
    ax.plot(Fdet[i, :]+(top*i))
    ax.axhline(0+(top*i))

ax.axvline(15857)


fig.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}", size = 50)

In [None]:
plt.plot(Fdet[4, :])

My duuuuuude

In [None]:
win_frames = int(ops["detrend_win"] * fs)
roll = twop.rolling_percentile(Foffset[4, :], win_frames, ops["detrend_pctl"])
plt.plot(roll)

subject = (f"Percentile {ops['detrend_pctl']} ")

plt.axvline(15857, color =  "red", alpha = 0.5)

plt.suptitle(f"{subject} for {MOUSE}, {SESSION}, plane {idx}")


In [None]:
reclist = list(np.zeros_like(rec_intervals[:,0]))
for index,recording in enumerate(rec_intervals[0:2,]):
    print(recording)
    start = recording[0]
    end = recording[1]
    reclist[index] = trace_list[idx]["F"][:, start:end].reshape(-1)
    plt.hist(reclist[index], alpha = 0.2, label=f"Recording {index}", bins = 200, density = True)

plt.legend()

#rec1 = trace_list[idx]["F"][:, ]

In [None]:
conflicts into overwrite, throw gpu thing with run to no