# Mismatch detection
>Antonio Colás Nieto, @acolasn, anconi.1999@gmail.com

We will run standard synchronization code and attempt to find and analyse mismatch responses.  

Taken from `demo_analysis.ipynb`, and hoping that I need to change as little as possible. 

In [None]:
%reload_ext autoreload
%autoreload 2

import functools

print = functools.partial(print, flush=True)

import os
import random
import numpy as np
import pandas as pd
from scipy.stats import zscore, mannwhitneyu
import matplotlib.pyplot as plt
from pathlib import Path
import pickle
from cottage_analysis.analysis import (
    mismatch)
import flexiznam as flz
from tqdm import tqdm




In [None]:
mismatch.analyse_session("BRAC9057.4j_S20240514")

In [None]:
PROJECT = "663214d08993fd0b6e6b5f1d"
PROTOCOL = "KellerTube"
MESSAGES = "harpmessage.bin"

In [None]:
# Example session
mouse = "BRAC9057.4j"
session = "BRAC9057.4j_S20240516"

flexilims_session = flz.get_flexilims_session(project_id=PROJECT)
# all_protocol_recording_entries = generate_filepaths.get_all_recording_entries(project=project,
#                                                                               mouse=mouse,
#                                                                               session=session,
#                                                                               protocol=protocol,
#                                                                               flexilims_session=flexilims_session)

# # DO NOT RUN THIS FUNCTION (TAKES 2hrs ish): to find monitor frames from photodiode signal
# find_monitor_frames(project=project,
#                     mouse=mouse,
#                     session=session,
#                     protocol=protocol,
#                     all_protocol_recording_entries=None,
#                     irecording=None,
#                     flexilims_session=None)


exp_session = flz.get_entity(
    datatype="session", name=session, flexilims_session=flexilims_session
)

si_datasets = flz.get_datasets(
    exp_session['id'],
    dataset_type='suite2p_rois',
    flexilims_session=flexilims_session
)

suite2p_dataset = si_datasets[0]

In [None]:
recordings = flz.get_entities(
    datatype="recording",
    origin_id=exp_session['id'],
    query_key="recording_type",
    query_value="two_photon",
    flexilims_session=flexilims_session,
)

In [None]:
recordings.name[1]

In [None]:
which_recording= 3

recname = recordings.name[which_recording]

recording = flz.get_entity(
    datatype="recording",
    name=recname,
    flexilims_session=flexilims_session,
)

In [None]:
recording.name

## Synchronization

Ideally, this would work out of the bat. 

In [None]:
# Generate synchronisation dataframes

vs_df_all, imaging_df_all, recordings = mismatch.sync_all_recordings(
    session_name=session,
    flexilims_session=flexilims_session,
    project=PROJECT,
    filter_datasets={"anatomical_only": 3},
    recording_type="two_photon",
    protocol_base="KellerTube",
    photodiode_protocol=5,
    return_volumes=True,
)

# Analysis: have a working pipeline

Assuming there is only one  depth recording

In [None]:
closed_loop = imaging_df_all[which_recording-1]

In [None]:
#plt.plot(imaging_closed_loop_all["mouse_z"]/imaging_closed_loop_all["mismatch_mouse_z"])
plt.plot(closed_loop["mouse_z"])


## Finding mismatches

In [None]:
closed_loop["mousez_dif"]  = np.zeros(len(closed_loop["mouse_z"]))
closed_loop.loc[1:, 'mousez_dif'] = np.diff(closed_loop['mouse_z'])


In [None]:
closed_loop["mismz_dif"]  = np.zeros(len(closed_loop["mouse_z"]))
closed_loop.loc[1:, 'mismz_dif'] = np.diff(closed_loop['mismatch_mouse_z'])

In [None]:
closed_loop["mism_ratio"] = closed_loop["mousez_dif"]/closed_loop["mismz_dif"]

In [None]:
plt.plot(closed_loop["mism_ratio"][720:740])

In [None]:
closed_loop["mismatch"] = ((closed_loop["mism_ratio"] > 1.2) | (closed_loop["mism_ratio"] < -1000)).astype(int)

#To catch the fact that it's -inf sometimes during a mismatch

In [None]:
up= 0
down =  -1

plt.plot(closed_loop["mism_ratio"][up:down])
plt.plot(closed_loop["mismatch"][up:down])

closed_loop["mism_ratio"][up:down]

In [None]:
closed_loop.drop(columns = {"mismz_dif", "mousez_dif"})

## Defining mismatch window for raster

By design, 5 frames before and 10 after the mismatch onset frame. The length is 15 frames. 

In [None]:
# Create a new column initialized to 0
closed_loop['range_indicator'] = 0

#Make a diff to look at starting frames
closed_loop["start_mismatch"]=np.zeros(len(closed_loop["mismatch"]))

closed_loop.loc[1:,"start_mismatch"] = np.diff(closed_loop["mismatch"])

# Find indices where 'indicator' is 1
indices = closed_loop.index[closed_loop['start_mismatch'] == 1].tolist()
print(indices)

# Set range_indicator to 1 for 5 rows before and after each index where 'indicator' is 1
for idx in indices:
    start = max(idx - 5, 0)
    end = min(idx + 10, len(closed_loop['mismatch']) - 1)
    print((start,  end))
    closed_loop.loc[start:end, 'range_indicator'] = 1

For every neuron, we want to find a point in time where the mismatch begins and calculate the average of its responses to a mismatch

In [None]:
# Create the initial DataFrame with range_indicator
neurons_closed_loop = pd.DataFrame({"range_indicator": closed_loop["range_indicator"].copy()})

# Extract the number of neurons
neurons = closed_loop["closed_loopfs"][0].shape[1]

# Create a DataFrame for the neurons data
neuron_data = pd.DataFrame(
    {f"neuron{neuron}": [closed_loop["closed_loopfs"][i][0][neuron] for i in range(len(closed_loop["mismatch"]))]
     for neuron in range(neurons)}
)

# Concatenate the range_indicator and neuron data
neurons_closed_loop = pd.concat([neurons_closed_loop, neuron_data], axis=1)

In [None]:
mismatches_per_neuron = list(np.zeros(neurons))

neurons_closed_loop["start_mismatch"]=np.zeros(len(neurons_closed_loop["range_indicator"]))

neurons_df.loc[1:,"start_mismatch"] = np.diff(neurons_df["range_indicator"])

n_mismatches  =  len(neurons_df["start_mismatch"][neurons_df["start_mismatch"]==1])

print(n_mismatches)

for i in range(neurons):
    mismatches_per_neuron[i] = np.zeros((n_mismatches, 15))



In [None]:


# Initialize variables to track the start and end of intervals
in_interval = False
start_idx = None

# Iterate through the DataFrame to identify intervals
idx_mismatch = -1
for idx, row in neurons_df.iterrows():
    if row['range_indicator'] == 1 and not in_interval:
        # Start of a new interval
        start_idx = idx
        in_interval = True
        idx_mismatch += 1
        print(f"This is mismatch {idx_mismatch}")
    elif row['range_indicator'] == 0 and in_interval:
        # End of the current interval
        end_idx = idx-1
        for neuron in range(neurons):
            mismatches_per_neuron[neuron][idx_mismatch, :] = neurons_df[f"neuron{neuron}"][start_idx:end_idx]
        in_interval = False
        print(f"start and end idx: {(start_idx, end_idx)}")

In [None]:
mismatch_raster = np.zeros((neurons, 15))

for i in range(neurons):
    mismatch_raster[i, :] = np.mean(mismatches_per_neuron[i], axis = 0)

#print(mismatch_raster.shape)

# Define a function to calculate the difference for each row
def calculate_difference(row):
    first_5_sum = np.sum(row[0:5])
    last_5_sum = np.sum(row[6:11])
    return last_5_sum-first_5_sum

# Calculate differences for each row
differences = np.apply_along_axis(calculate_difference, 1, mismatch_raster)
#print(differences[0:10])

# Get the sorted indices based on the differences (larger differences first)
sorted_indices = np.argsort(-differences)
#print(sorted_indices[0:10])

# Sort the array based on the calculated differences
sorted_mismatch_raster = mismatch_raster[sorted_indices]

start = 0
end = 100

fig = plt.figure(figsize=(30,10),facecolor='w') 
ax = fig.add_subplot(111)
im = ax.imshow(sorted_mismatch_raster[0:100])



ax.set_title(f"Raster plot of first {end} neurons aligned to mismatch")
ax.set_xlabel("Frames")
ax.set_ylabel("Neurons")
fig.colorbar(im, label  =  "dff")
ax.axvline(5, color = "grey")

In [None]:
plt.plot(differences[sorted_indices])


In [None]:
pop_response  = np.mean(sorted_mismatch_raster[0:100], axis = 0)
plt.plot(pop_response)

## Now sorting a la Keller

Evaluate significant mismatch modulation  by producing a series of  random events and looking at the differential modullation from mismatches. 

In [None]:
def generate_random_events(n_frames, n_events = 100):
    return [random.randint(0, n_frames-1) for _ in range(n_events)]

In [None]:
n_frames = len(closed_loop)
events = generate_random_events(n_frames)

In [None]:
closed_loop["randevents"] = 0
closed_loop.loc[events, "randevents"] = 1
plt.plot(closed_loop["randevents"])

In [None]:
rand_rec, indices = mismatch.create_mismatch_window(closed_loop, window_start = 0, window_end = 5, event  = "randevents")

In [None]:
neurons, rand_neurodf = mismatch.build_neurons_df(rand_rec)

In [None]:
rand_misperneuron = mismatch.build_mismatches_per_neuron_list(neurons, rand_neurodf, window_start = 0, window_end = 5, indices = indices)

In [None]:
rand_raster = mismatch.raster(neurons, rand_misperneuron, window_start = 0, window_end= 5)

In [None]:
rand_avg = np.mean(rand_raster, axis = 1)
mismatch_avg = np.mean(mismatch_raster[:, 8:13], axis = 1)
modulation_raster = mismatch_avg-rand_avg

In [None]:
sorted_indices = np.argsort(-modulation_raster)
#print(sorted_indices[0:10])

plt.plot(modulation_raster[sorted_indices])

In [None]:


# Sort the array based on the calculated differences
sorted_mismatch_raster = mismatch_raster[sorted_indices]

# Define the range you want to cap
vmin = -0.5
vmax = 1

start = 0
end = 100
fig = plt.figure(figsize=(30, 10), facecolor='w')
ax = fig.add_subplot(111)
im = ax.imshow(sorted_mismatch_raster, cmap='coolwarm', vmin=vmin, vmax=vmax, aspect =  "auto")


ax.set_title(f"Raster plot of neurons aligned to mismatch")
ax.set_xlabel("Frames")
ax.set_ylabel("Neurons")
fig.colorbar(im, label="dff")
ax.axvline(5, color="grey")
plt.show()

In [None]:
pop_response  = np.mean(sorted_mismatch_raster, axis = 0)
plt.plot(pop_response)

In [None]:
closed_loop, indices = mismatch.create_mismatch_window(closed_loop, window_start = 5, window_end = 20)
neurons, neurons_df = mismatch.build_neurons_df(closed_loop)
misperneuron = mismatch.build_mismatches_per_neuron_list(neurons, neurons_df, window_start = 5, window_end = 20)
mismatch_raster = mismatch.raster(neurons, misperneuron, window_start = 5, window_end = 20)
rand_raster = mismatch.make_rand_raster(closed_loop, n_events = 200, window_end = 10)
sorted_mismatch_raster = mismatch.modulation_sort_raster(rand_raster, mismatch_raster)
mismatch.plot_raster(sorted_mismatch_raster, vmin = -0.5,  vmax = 0.8)

In [None]:


mismatch.plot_raster(sorted_mismatch_raster, vmin = -0.5,  vmax = 0.8)

## Z-scoring and trying new preprocess pipeline

In [None]:
closed_loop = mismatch.find_mismatch(closed_loop)

closed_loop, indices = mismatch.create_mismatch_window(closed_loop, window_start = 5, window_end = 20)
neurons, neurons_df = mismatch.build_neurons_df(closed_loop)


In [None]:
misperneuron = mismatch.build_mismatches_per_neuron_list(neurons, neurons_df, window_start = 5, window_end = 20)
mismatch_raster = mismatch.raster(neurons, misperneuron, window_start = 5, window_end = 20)

In [None]:
rand_raster = mismatch.make_rand_raster(closed_loop, n_events = 200, window_end = 10)

In [None]:
sorted_mismatch_raster, modulation_raster = mismatch.modulation_sort_raster(rand_raster, mismatch_raster)
mismatch.plot_raster(sorted_mismatch_raster, vmin = -1,  vmax = 1)


In [None]:
plt.plot(sorted_mismatch_raster[-10:,:].T)

In [None]:
plt.plot(modulation_raster[np.argsort(-modulation_raster)])
plt.axhline(0, color="red")
plt.title("Z-scores sorted from high to low")
plt.ylabel("Z-score")
plt.xlabel("Neurons")

In [None]:
def plot_pop_response(sorted_mismatch_raster, how_many=None):
    fig, ax = plt.subplots()

    ax.plot(np.mean(sorted_mismatch_raster, axis=0), label="Total")

    if how_many is None:
        how_many = int(0.1 * len(sorted_mismatch_raster))

    ax.plot(np.mean(sorted_mismatch_raster[:how_many, :], axis=0), label=f"First {how_many}")
    ax.plot(np.mean(sorted_mismatch_raster[-how_many:, :], axis=0), label=f"Bottom {how_many}")

    ax.set_title("Population response")
    ax.set_ylabel("Z-score")
    ax.set_xlabel("Frames")
    ax.axvline(5, color="red", alpha=0.3, label="Mismatch onset")
    ax.axvline(15, color="green", alpha=0.3, label="End of response window")
    ax.axhline(0, color="green", alpha=0.3)

    ax.legend()

    return fig, ax

-  Activity is high before a mismatch
-  Current analysis selects cells that were already more active before the  mismatch.

## A challenge because you're dumb: find trials. 

I need to find six meters, then three seconds, then six meters.   

In [None]:
closed_loop

In [None]:

closed_loop = mismatch.find_trials(closed_loop)

In [None]:

start= 0

end = -1


plt.plot(closed_loop["mouse_z"][start:end],closed_loop["trial_indicator"][start:end])

plt.plot(closed_loop["mouse_z"][start:end], closed_loop["mismatch"][start:end]*30)

In [None]:
sync_loop, fig, ax = mismatch.check_trials(flexilims_session, recording, closed_loop)

### Verifying

In [None]:
def get_mismatch_debug_file(flexilims_session, recording):

    raw = flz.get_data_root("raw", flexilims_session=flexilims_session)

    ds = flz.get_datasets(
        flexilims_session=flexilims_session,
        origin_name=recording.name,
        dataset_type="harp",
        allow_multiple=False,
    )

    filename=ds.csv_files["DebugMismatchDis"]

    MismatchDebug =pd.read_csv(raw/recording.path/filename)
    
    return  MismatchDebug



In [None]:
def synchronize_dataframes(df_a, df_b):
    df_a['IsMismatch'] = 0 

    for i, (idx, row) in enumerate(df_a.iterrows()):
        value_a = row["mismatch_mouse_z"]*100
        
        # Find the row in df_b where the value in 'variable1' is closest to value_a
        closest_row = df_b.iloc[(df_b["MismatchDistance"] - value_a).abs().argmin()]
        #print((closest_row["MismatchDistance"], value_a))
        
        # Combine the rows
        df_a["IsMismatch"][i] = closest_row["IsMismatch"]
    
    return df_a

sync_loop = synchronize_dataframes(closed_loop, MismatchDebug)

In [None]:

def plot_synchronized_data(closed_loop, sync_loop, start, end):
    fig, ax = plt.subplots(figsize=(20, 10))  # Very large figure
    
    ax.plot(closed_loop["mouse_z"][start:end], closed_loop["trial_indicator"][start:end], label="Trial indicator")
    ax.plot(closed_loop["mouse_z"][start:end], closed_loop["mismatch"][start:end]*30, label="Mismatch")
    ax.plot(closed_loop["mouse_z"][start:end], sync_loop["IsMismatch"][start:end]*30, label="Old mismatch window")
    
    ax.legend()
    ax.set_xlabel("Mouse Z")
    ax.set_ylabel("Values")
    ax.set_title("Synchronized Data Plot")
    
    plt.show()
    return fig, ax

# Example usage
# Assuming you have your dataframes closed_loop and sync_loop
# closed_loop = pd.DataFrame({...})
# sync_loop = synchronize_dataframes(closed_loop, other_dataframe, 'variable1')

# Plotting the data
start = 0
end = -1
fig, ax = plot_synchronized_data(closed_loop, sync_loop, start, end)

It's quite good

# Establish significance of the mismatch tuning

## How Attinger does it



The details of the statistical analysis are noted in the figure legends. We did not test the distribution of the data for normality. 

### What are the traces that they plot?

To quantify average response traces, we first calculated the average event-triggered fluorescence trace for each neuron. 

The responses of all neurons were then averaged and the baseline (mean DF/F in a 0.5 s window pre event onset) was subtracted. 


> *When does this happen?* 

> To quantify the significance of the difference of two average calcium responses as a function of time, we performed a separate Student’s t test for every bin of the calcium trace (10 Hz or 15 Hz) and marked bins as significantly different for p < 0.01. For visual clarity, we removed isolated significant bins, such that a significant bin was only marked if at least one of the two neighboring bins was also significant. 

### When is a neuron significantly modulated by mismatch?

To calculate the average response of each neuron to mismatch or playback halt, we first calculated the difference between the average event-triggered response and the average response to 1000 randomly triggered events to generate a random-corrected trace. Average responses to mismatch and playback halt were then calculated as the mean fluorescence of the random-corrected average in a response window minus the mean fluorescence in a baseline window for each neuron (the response window for mismatch, playback halt, running onset and playback onset was +500 ms to +1500 ms, and the baseline subtraction window was 1000 ms to 0 ms). 

To determine the significance of a neuron’s response, we calculated individual neuron responses to each mismatch event as described above and compared this distribution to the distribution generated by 1000 randomly triggered events. Significance was determined with a two sided Mann-Whitney-U test (p < 0.05). 

### Which mismatches count?

For mismatch and random events to be included in the analysis, mice had to be running above threshold (102 cm/s) before and after event onset (from 600 ms to + 1100 ms). 

In addition, for playback halt events to be included, mice had to be stationary during the playback halt (no running from 600 ms to +1100 ms). 

For running onset, mice had to be stationary for at least 600 ms prior to the running onset and continue running for 1100 ms above threshold following the onset. Similarly, for playback onset (quantified only during open-loop sessions) there had to be no visual flow for 600 ms prior to visual flow onset, followed by continuous visual flow above threshold for at least 1100 ms after onset, mice had to be stationary during this time.

### Cutoffs

In [None]:
closed_loop

## How we do it

In [None]:
closed_loop

1. Generate a set of randomly triggered events on suitable parts of the trial.
2. Is that distribution different from the mismatch distribution?

###  Generate  suitable  distribution


In [None]:
def define_window_for_mismatch(closed_loop, corridor_length = 6):

    # Add a new column for the trial indicator
    closed_loop['mismatch_window'] = False
    closed_loop["in_trial"]  = np.where(closed_loop['trial_indicator'] > 0, True, False)

    # Define the thresholds
    beggining_threshold = corridor_length*(1/3)  
    end_threshold = corridor_length*(5/6)    

    # Initialize variables for trial tracking
    start_distance = 0
    n_rows = len(closed_loop)
    print(n_rows)
    i = 0


    while i < n_rows:

        current_distance = closed_loop["mouse_z"].iloc[i] - start_distance
        in_trial = closed_loop["in_trial"].iloc[i]

        if closed_loop["in_trial"].iloc[i] == 1 and closed_loop["in_trial"].iloc[i-1] == 0:
            start_distance = closed_loop["mouse_z"].iloc[i]

        # Assign mismatch window indicator
        while i < n_rows and current_distance >= beggining_threshold and current_distance < end_threshold:
            in_trial = closed_loop["in_trial"].iloc[i]

            if in_trial:
                closed_loop.loc[i, 'mismatch_window'] = True
            i += 1
            if i < n_rows:
                current_distance = closed_loop["mouse_z"].iloc[i] - start_distance
        
        i += 1
    

    return closed_loop



In [None]:
fig,  ax =  mismatch.plot_synchronized_data(closed_loop, sync_loop)
ax.plot(closed_loop["mouse_z"], closed_loop["mismatch_window"]*30, label="mismatch window")
ax.legend()

plt.show()


In [None]:
mis_closed_loop = closed_loop[closed_loop["mismatch_window"]]

In [None]:
mis_closed_loop.to_csv("bananas.csv", index =  True)

In [None]:
indices = mis_closed_loop.index.tolist()

In [None]:
def generate_plausible_mismatch_indices(closed_loop):
    mis_closed_loop = closed_loop[closed_loop["mismatch_window"]]
    indices = mis_closed_loop.index.tolist()
    return indices

### Test distribution

In [None]:
null_mode ="trial_structure"
save =True

print("Estimating mismatch distribution")
closed_loop = mismatch.find_mismatch(closed_loop)
closed_loop, idxs = mismatch.create_mismatch_window(
    closed_loop, window_start=5, window_end=20
)
neurons, neurons_df = mismatch.build_neurons_df(closed_loop)
misperneuron = mismatch.build_mismatches_per_neuron_list(
    neurons, neurons_df, window_start=5, window_end=20
)
mismatch_raster = mismatch.raster(neurons, misperneuron, window_start=5, window_end=20)

if null_mode == "trial_structure":

    closed_loop = mismatch.find_trials(closed_loop)
    closed_loop = mismatch.define_window_for_mismatch(closed_loop)
    indices = mismatch.generate_plausible_mismatch_indices(closed_loop)
else:
    indices = None

print("Estimating null distribution")
rand_raster, rand_misperneuron = mismatch.make_rand_raster(closed_loop, n_events=200, window_end=10, indices = indices)
sorted_mismatch_raster, modulation_raster = mismatch.modulation_sort_raster(rand_raster, mismatch_raster)
sorted_p = mismatch.calculate_significance(misperneuron, rand_misperneuron, modulation_raster)

print("Plotting")
rasterfig, rasterax = mismatch.plot_raster(sorted_mismatch_raster)
rasterfig, rasterax, rasterax2 =  mismatch.plot_significance(rasterfig, rasterax, sorted_p)
plt.show()
popfig, popax = mismatch.plot_pop_response(sorted_mismatch_raster)
plt.show()
sync_loop, fig, ax = mismatch.check_trials(flexilims_session, recording, closed_loop)

print("Saving...")

if save:

    ## save stuff

    processed = flz.get_data_root("processed", flexilims_session=flexilims_session)
    path = processed / recording.path

    #getting unsorted significance of modulation
    p = mismatch.calculate_significance(misperneuron, rand_misperneuron)

    #save dataframe
    mismatch_df =  {
        'modulation_size': modulation_raster,
        'p_value': p
    }
    mismatch_df = pd.DataFrame(mismatch_df)
    mismatch_df.to_pickle(str(path / "mismatch_df.pkl"))

    #save mismatch_raster
    np.save(str(path / "mismatch_raster.npy"), mismatch_raster)

    #save figures
    rasterfig.savefig(str(path / "raster"))
    popfig.savefig(str(path / "population"))

In [None]:
recording.path

In [None]:
processed = flz.get_data_root("processed", flexilims_session=flexilims_session)
processed

We must test `misperneuron` and `rand_misperneuron` 

In [None]:
def calculate_significance(misperneuron, rand_misperneuron, modulation_raster = None):

    neuron_p = np.zeros(len(misperneuron))

    for neuron in tqdm(range(len(misperneuron))):
        mis_responses = misperneuron[neuron][:,5:15] #keep the rersponse part
        rand_responses  = rand_misperneuron[neuron]

        mis_mean = np.mean(mis_responses, axis=1)
        rand_mean =(np.mean(rand_responses, axis = 1))
        p = mannwhitneyu(mis_mean, rand_mean)
        neuron_p[neuron] = p.pvalue
    
    if modulation_raster is not None:

        sorted_indices = np.argsort(-modulation_raster)
        sorted_p = neuron_p[sorted_indices]

        return sorted_p
    
    else:

        return neuron_p





In [None]:
sorted_p = calculate_significance(misperneuron, rand_misperneuron, modulation_raster)

In [None]:
sorted_indices = np.argsort(-modulation_raster)
sorted_p = neuron_p[sorted_indices]
plt.plot(sorted_p)
plt.ylim((0, 0.05))

In [None]:
plot_sorted_p = np.where(sorted_p<0.05, 1, 0)
plt.plot(plot_sorted_p)
plt.show()
plot_sorted_p = plot_sorted_p[np.newaxis,  :]
plt.imshow(plot_sorted_p.T, aspect = "auto")


In [None]:

fig, ax = mismatch.plot_raster(sorted_mismatch_raster)
# The list [left, bottom, width, height] specifies the subplot's position and size in figure coordinates (0 to 1)

def add_significance(fig, ax, sorted_p):

    #Prepare for plotting
    plot_sorted_p = np.where(sorted_p<0.05, 1, 0)
    plot_sorted_p = plot_sorted_p[np.newaxis,  :]

    ax2 = fig.add_axes([0.06, 0.1, 0.03, 0.8], sharey = ax)  # Adjust these values as needed
    ax2.imshow(plot_sorted_p.T, aspect ="auto", cmap ="binary")
    ax.yaxis.set_visible(False)
    ax2.xaxis.set_visible(False)
    ax2.set_ylabel("Neurons", size =  25)

    return fig, ax, ax2


## Piping

For every recording, we want to save three plots, a dataframe that looks like Yiran's `neurons_df`, and a dataframe with the size of the mismatch responses and their significance. 

So far: when given a session, get the recordings, iterate through the sync dataframes, analyse, move to the next recording. 

In [None]:
yirans = pd.read_pickle("/camp/lab/znamenskiyp/home/shared/projects/depth_mismatch_seq/BRAC9057.4j/S20240517/neurons_df.pickle")

In [None]:
yirans

# Debug  :(

In [None]:
session = "BRAC9057.4j_S20240517"
flexilims_session = None

In [None]:
#  get the right stuff

if flexilims_session is None:
    flexilims_session = flz.get_flexilims_session(project_id=PROJECT)

exp_session = flz.get_entity(
    datatype="session", name=session, flexilims_session=flexilims_session
)

vs_df_all, imaging_df_all, recordings = mismatch.sync_all_recordings(
    session_name=session,
    flexilims_session=flexilims_session,
    project=PROJECT,
    filter_datasets={"anatomical_only": 3},
    recording_type="two_photon",
    protocol_base=PROTOCOL,
    photodiode_protocol=5,
    return_volumes=True,
)

recordings = flz.get_entities(
    datatype="recording",
    origin_id=exp_session['id'],
    query_key="recording_type",
    query_value="two_photon",
    flexilims_session=flexilims_session,
)

recordings = recordings[recordings.name.str.contains(PROTOCOL)]

In [None]:
i = 0
recname = recordings.name[0]

recording = flz.get_entity(
    datatype="recording",
    name=recname,
    flexilims_session=flexilims_session,
)



closed_loop = imaging_df_all[i]
is_playback = mismatch.determine_if_playback(recording, flexilims_session)


In [None]:
print("Estimating mismatch distribution")
closed_loop = mismatch.find_mismatch(closed_loop, is_playback)
closed_loop, idxs = mismatch.create_mismatch_window(
    closed_loop, window_start=5, window_end=20
)
neurons, neurons_df = mismatch.build_neurons_df(closed_loop)

In [None]:
beg= 2350
end = 2450

plt.plot(closed_loop["range_indicator"][beg:end], label  = "range_indicator")
plt.plot(closed_loop["mismatch"][beg:end], label = "mismatch")
plt.plot(closed_loop["mism_ratio"][beg:end])

#closed_loop = mismatch.find_trials(closed_loop)
plt.plot(closed_loop["trial_indicator"][beg:end]/20, label = "trial_indicator")


plt.legend()

plt.show()

plt.plot(closed_loop["mouse_z"][beg:end]/150, label = "mouse_z")
plt.plot((closed_loop["mismatch_mouse_z"][beg:end]+6)/150, label = "mismatch_mouse_z")

plt.legend()

plt.show()


In [None]:
closed_loop["mouse_z"][1]

In [None]:
beg= 2400
end = 2600
plt.plot(closed_loop["mouse_z"][beg:end])
plt.plot(closed_loop["mismatch"][beg:end]*150)

plt.plot(closed_loop["mismatch_mouse_z"][beg:end]+6)

In [None]:
window_start = 5
window_end = 20
indices = None

mismatches_per_neuron = list(np.zeros(neurons))

window = window_start + window_end

neurons_df["start_mismatch"] = np.zeros(len(neurons_df["range_indicator"]))

neurons_df.loc[1:, "start_mismatch"] = np.diff(neurons_df["range_indicator"])

if indices is None:
    n_mismatches = len(
        neurons_df["start_mismatch"][neurons_df["start_mismatch"] == 1]
    )
else:
    n_mismatches = len(indices)

print(f"# mismatches: {n_mismatches}")

for i in range(neurons):
    mismatches_per_neuron[i] = np.zeros((n_mismatches, window))

# Initialize variables to track the start and end of intervals
in_interval = False
start_idx = None

if indices is None:
    # Iterate through the DataFrame to identify intervals
    idx_mismatch = -1
    print(f"Building {n_mismatches} mismatches per neuron")
    for idx, row in tqdm(neurons_df.iterrows()):
        if row["range_indicator"] == 1 and not in_interval:
            # Start of a new interval
            start_idx = idx
            in_interval = True
            idx_mismatch += 1
            # print(f"This is mismatch {idx_mismatch}")
        elif row["range_indicator"] == 0 and in_interval:
            # End of the current interval
            end_idx = idx - 1
            for neuron in range(neurons):
                mismatches_per_neuron[neuron][idx_mismatch, :] = neurons_df[
                    f"neuron{neuron}"
                ][start_idx:end_idx]
            in_interval = False
            # print(f"start and end idx: {(start_idx, end_idx)}")

else:
    print(f"Building {n_mismatches} mismatches per neuron")
    nframes = len(neurons_df)
    for idx_mismatch, idx in tqdm(enumerate(indices)):
        start_idx = max(0, idx - window_start)
        end_idx = min((nframes - 1), idx + window_end)
        for neuron in range(neurons):
            #You need to make sure the window always has the right size. 
            slice_data = neurons_df[f"neuron{neuron}"][start_idx:end_idx].values
            if len(slice_data) < window:
                slice_data = np.pad(slice_data, (0, window - len(slice_data)), 'constant')
            mismatches_per_neuron[neuron][idx_mismatch, :] = slice_data

In [None]:
plt.plot(neurons_df["range_indicator"])

In [None]:
plt.plot(closed_loop["mismatch"])

Okay. Of course it breaks. It's always uncoupled! I need to find playback halts. 

In [None]:
ds = flz.get_datasets(
    flexilims_session=flexilims_session,
    origin_name=recording.name,
    dataset_type="harp",
    allow_multiple=False,
)

In [None]:
closed_loop