In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline


In [2]:
import spikeinterface.full as si
import numpy as np
import pylab as plt
from pathlib import Path

base_folder = Path('.')
job_kwargs = {'n_jobs': -1, 'progress_bar' :True, 'chunk_duration' : '1s', 'verbose': True}


## We load all the recordings

In [3]:
import os, h5py
import pandas as pd
from tools import load_experiment, infer_boundaries

recordings = {}
remove_center = True

for folder in os.listdir(base_folder / "experiments"):
    datapath = base_folder / "experiments" / folder

    for file in os.listdir(datapath):
        if file.endswith(".xlsx"):
            data = pd.read_excel(datapath / file)
            data.to_csv(str(datapath / file).replace('.xlsx', '.csv'))
    
    for file in os.listdir(datapath):
        if file.endswith(".h5"):
            key, ext = os.path.splitext(file)
            try:
                recordings[key] = load_experiment(file, datapath, remove_center)
            except Exception:
                print('Problem while loading', datapath, file)
print('We have loaded', len(recordings), 'recordings')
for key in recordings.keys():
    recordings[key]['filtered'] = si.bandpass_filter(recordings[key]['raw'], freq_min= 150, freq_max= 7000, ftype= "bessel", filter_order= 2)
    recordings[key]['filtered'] = si.common_reference(recordings[key]['filtered'])
#    recordings[key]['filtered'] = si.zscore(recordings[key]['filtered'], dtype='float32')

experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA21030_v270224_DIV20_MOCK_basal.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA21030_v270224_DIV20_MOCK_postT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA21030_v270224_DIV20_MOCK_preT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_basal.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_postT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_preT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22871_v270224_DIV20_BIN1iso1_basal.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22871_v270224_DIV20_BIN1iso1_postT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22871_v270224_DIV20_BIN1iso1_preT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22885_v270224_DIV20_BIN1iso1_basal.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22885_v270224_DIV20_BIN1iso1_postT.h5
experiments/BIN1 SET3/2024-03-20_CG_BIN1exp3_MEA22885_v270224_DIV20_BIN1iso1_preT

## We perform (or load) all the spike sortings

In [None]:
job_kwargs = {'n_jobs': -1, 'progress_bar' :True, 'chunk_memory' : '100M'}
si.set_global_job_kwargs(**job_kwargs)
erase = True
for key in recordings.keys():
    folder = base_folder / "sortings"
    folder.mkdir(parents=True, exist_ok=True)
    folder = base_folder / "sortings" / key
    if key == '2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_basal':
        if folder.exists() and not erase:
            recordings[key]['sorting'] = si.read_sorter_folder(folder)
        else:
            recordings[key]['sorting'] = si.run_sorter('spykingcircus2', recordings[key]['filtered'], 
                                      folder=folder, verbose=True, apply_preprocessing=False, remove_existing_folder=True)

## We compute (or load) all the waveforms extracted from the spike sortings

In [None]:
erase = True
for key in recordings.keys():
    folder = base_folder / "analyzers"
    folder.mkdir(parents=True, exist_ok=True)
    folder = base_folder / "analyzers" / key
    if key == '2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_basal':
        if folder.exists() and not erase:
            recordings[key]['analyzer'] = si.load_sorting_analyzer(folder)
        else:
            recordings[key]['analyzer'] = si.create_sorting_analyzer(recordings[key]['sorting'], 
                                                                     recordings[key]['filtered'], format='binary_folder', 
                                                                     folder=folder, return_scaled=True, overwrite=True, sparse=True)
            recordings[key]['analyzer'].compute(['random_spikes', 'templates', 'noise_levels', 
                                                 'quality_metrics', 'template_similarity', 'spike_amplitudes'])
            recordings[key]['analyzer'].compute('correlograms', window_ms=40, bin_ms=2)
            recordings[key]['analyzer'].save_as(folder=folder)

## We compute the boundaries of the source/target population for every recording

In [7]:
from tools import infer_boundaries
for key in recordings.keys():
    recordings[key]['boundaries'] = infer_boundaries(recordings[key]['mapping'])

## We need to define a quality criteria that will be used in all the following operations

In [8]:
quality_criteria = 'snr > 3 & isi_violations_ratio < 0.1'

## We compute (or load) the quality metrics for all the recordings

In [9]:
from tools import get_positions
for key in recordings.keys():
    if key == '2024-03-20_CG_BIN1exp3_MEA22338_v270224_DIV20_MOCK_basal':
        sa = recordings[key]['analyzer']
        if sa.get_extension('quality_metrics') is None:
            sa.compute(['quality_metrics'])
        recordings[key]['metrics'] = sa.get_extension('quality_metrics').get_data()
        positions, x, y = get_positions(recordings[key])
        #recordings[key]['metrics'].insert(0, "position", list(positions))
        #recordings[key]['metrics'].insert(1, "x", list(x))
        #recordings[key]['metrics'].insert(2, "y", list(y))
        path = Path('plots') / "statistics"
        path.mkdir(parents=True, exist_ok=True)
        recordings[key]['metrics'].to_excel(path / f"{key}.xlsx")
        recordings[key]['metrics'].query(quality_criteria).to_excel(path / f"quality_only_{key}.xlsx")