# Pre-processing pipeline for openephys sessions, starling
- For every run in the session:
 - Load the recordings
 - Extract wav chan with micrhopohone and make a wav chan with the nidq syn signal
 - Get the sync events for the nidq sync channel
 
 - Do bout detection
 
In another notebook, bout detection is curated
- Left to decide where to:
    - Sort spikes
    - Sync the spikes/lfp/nidq
    - make and plot 'bout rasters'

In [1]:
%matplotlib inline
import os
import glob
import logging
import pickle
import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy import signal
import traceback
import warnings
import json

from matplotlib import pyplot as plt
from importlib import reload

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)


In [2]:
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util import sglxutil as sglu
from ceciestunepipe.util import rigutil as ru
from ceciestunepipe.util import wavutil as wu
from ceciestunepipe.util import syncutil as su
from ceciestunepipe.util import fileutil as fu

from ceciestunepipe.util.sound import boutsearch as bs
from ceciestunepipe.pipeline import searchbout as sb

from ceciestunepipe.util.spikeextractors import preprocess as pre

In [3]:
import spikeinterface as si
import spikeinterface.extractors as se
import spikeinterface.toolkit as st
import spikeinterface.sorters as ss
import spikeinterface.comparison as sc
import spikeinterface.widgets as sw
logger.info('all modules loaded')

2022-09-23 10:59:35,420 root         INFO     all modules loaded


## Session parameters and raw files

#### list all the sessions for this bird

In [6]:
bird = 's_b1376_22'
all_bird_sess = et.list_sessions(bird)
logger.info('all sessions for bird are {}'.format(all_bird_sess))

2022-09-23 10:59:58,636 root         INFO     all sessions for bird are ['2022-07-06', '2022-07-07', '2022-07-08', '2022-07-09', '2022-07-10', '2022-07-11', '2022-07-12', '2022-07-13', '2022-07-14', '2022-07-15', '2022-07-16', '2022-07-17', '2022-07-18', '2022-07-19', '2022-07-20', '2022-07-21', '2022-07-22', '2022-07-23', '2022-07-24', '2022-07-25', '2022-07-26', '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30', '2022-07-31', '2022-08-01', '2022-08-02', '2022-08-03', '2022-08-04', '2022-08-05', '2022-08-06', '2022-08-07', '2022-08-08', '2022-08-09', '2022-08-10', '2022-08-11', '2022-08-12', '2022-08-13', '2022-08-14', '2022-08-15', '2022-08-16', '2022-08-17', '2022-08-18', '2022-08-19', '2022-08-20', '2022-08-21', '2022-08-22', '2022-08-23', '2022-08-24', '2022-08-25', '2022-08-26', '2022-08-27', '2022-08-28', '2022-08-29', '2022-08-30', '2022-08-31', '2022-09-01', '2022-09-02', '2022-09-03', '2022-09-04', '2022-09-05', '2022-09-06', '2022-09-07', '2022-09-08', '2022-09-09', '20

### set up bird and sessions parameters
this will define:
- locations of files (for the bird)
- signals and channels to look for in the metadata of the files and in the rig.json parameter file: Note that this have to exist in all of the sessions that will be processed
- 'sess' is unimportant here, but it comes handy if there is need to debug usin a single session

In [58]:
reload(et)
# for one example session

sess_par = {'bird': bird,
           'sess': '2022-09-22',
           'probes': [], #probes of interest
           'mic_list': ['microphone_0'], #list of mics of interest, by signal name in rig.json
            'adc_list': ['pressure'],
           'sort': 0, #label for this sort instance
            
            'processor': 'Acquisition_Board-100.Rhythm Data',
            'record_node': 'Record Node 101',
            'experiment': 'experiment1',
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sort=sess_par['sort'], ephys_software='oe')

ksort_folder = exp_struct['folders']['ksort']
raw_folder = exp_struct['folders']['oe']

list all the epochs in a session, to check that it is finding what it has to find

In [27]:
import json

In [28]:
# this should go to eciestunepipe.file.bcistructure (as et here)
def list_oe_epochs(exp_struct):
    sess_path = os.path.join(exp_struct['folders']['oe'])
    epoch_list = [os.path.split(f.path)[-1] for f in os.scandir(sess_path) if f.is_dir()]
    return epoch_list

def list_nodes(epoch_path):
    return [f.path for f in os.scandir(epoch_path) if f.is_dir()]

def list_experiments(node_path):
    return [f.path for f in os.scandir(node_path) if f.is_dir()]

def list_recordings(experiment_path):
    return [f.path for f in os.scandir(experiment_path) if f.is_dir()]

def list_processors(signal_path):
    return [f.path for f in os.scandir(signal_path) if f.is_dir()]

def get_rec_meta(rec_path):
    rec_meta_path = os.path.join(rec_path, 'structure.oebin')
    with open(rec_meta_path, 'r') as meta_file:
        meta = json.load(meta_file)
    return meta

def get_continous_files_list(rec_path, processor='Rhythm_FPGA-100.0'):
    cont_raw_list = glob.glob(os.path.join(rec_path, 'continuous', processor, 'continuous.dat'))
    return cont_raw_list

def oe_list_bin_files(epoch_path):
    return glob.glob(os.path.join(epoch_path, 'experiment*.dat'))

def get_default_node(exp_struct, epoch, rec_index=0):
    # get the first rec node, the first experiment, and ith index of recording
    r_path = os.path.join(os.path.join(exp_struct['folders']['oe'], epoch))
    node = list_nodes(r_path)[0]
    
    r_path = os.path.join(r_path, node)
    experiment = list_experiments(r_path)[0]
    
    return r_path

def get_default_recording(node_path):
    experiment = list_experiments(node_path)[0]
    r_path = os.path.join(node_path, experiment)
    
    recording = list_recordings(r_path)[0]
    r_path = os.path.join(r_path, recording)
    return r_path

def get_default_continuous(rec_path):
    processor = list_processors(os.path.join(rec_path, 'continuous'))[0]
    r_path = os.path.join(rec_path, processor)
    return r_path

In [29]:
def get_oe_sample_rate(rec_meta_dict: dict) -> float:
    return float(rec_meta_dict['continuous'][0]['sample_rate'])


def build_chan_info_pd(oe_meta_dict: dict, processor_order: int=0) -> pd.DataFrame:
    # read all channels names, numbers, and whether they were recorded
    rec_chan_meta = oe_meta_dict['continuous'][processor_order]['channels']
    
    all_chan_meta = []
    for i, a_chan_meta in enumerate(rec_chan_meta):
        all_chan_meta.append({'number': i,
                              'recorded': 1,
                             'name': a_chan_meta['channel_name'],
                             'gain': float(a_chan_meta['bit_volts'])})
        
    all_chan_pd = pd.DataFrame(all_chan_meta)
    return all_chan_pd

def find_chan_order(chan_info_pd: pd.DataFrame, chan_name: str) -> int:
    recorded_block_pd = chan_info_pd[chan_info_pd['recorded']==1]
    recorded_block_pd.reset_index(inplace=True, drop=True)
    return recorded_block_pd[recorded_block_pd['name']==chan_name].index[0]

In [30]:
sess_epochs = list_oe_epochs(exp_struct)
sess_epochs.sort()
sess_epochs

['2022-09-22_07-24-41',
 '2022-09-22_09-41-21',
 '2022-09-22_13-31-30',
 '2022-09-22_16-54-40',
 '2022-09-22_18-17-35']

In [31]:
epoch = sess_epochs[0]

epoch_path = os.path.join(raw_folder, epoch)
epoch_path

'/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41'

In [32]:
node_path = get_default_node(exp_struct, epoch)
node_path

'/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101'

In [33]:
rec_path = get_default_recording(node_path)
rec_path

'/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101/experiment1/recording1'

In [34]:
cont_path = get_default_continuous(rec_path)
cont_path

'/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101/experiment1/recording1/continuous/Acquisition_Board-100.Rhythm Data'

# quick and dirty, do my thing and se the binary recording

In [35]:
class oeRecordingExtractor(se.BinaryRecordingExtractor):
    extractor_name = 'oeContinuousRecordingExtractor'
    has_default_locations = True
    installed = True  # check at class level if installed or not
    is_writable = False
    mode = 'file'
    # error message when not installed
    installation_mesg = "To use the SpikeGLXRecordingExtractor run:\n\n pip install mtscomp\n\n"

    _ttl_events = None  # The ttl events
    _t_0 = None # time computed naively (n/s_f_0)
    _t_prime = None # time synchronized to a pattern ('master') reference
    _s_f_0 = None # measured samplin rate (using the syn signal)
    _syn_chan_id = None # digital channel for signal id (for nidaq; automatic last channel in lf/ap streams)
    _dig = None # the digital signal
    _start_sample = None # start sample from the beginning of the run
    _start_t = None # start t (absolute in the machine)
    
    _meta_dict = None # dictionary with metadata of the recording
    _chan_pd = None # pandas dataframe with channel ids
    
    def __init__(self, rec_path: str, processor, dtype: str = 'int16', syn_chan_id=0):
        # dtype should come from the meta but for now its ok
        # rec_path is the path to the recording
        self._meta_dict = get_rec_meta(rec_path)
        self._chan_pd = build_chan_info_pd(self._meta_dict)
        self._s_f_0 = get_oe_sample_rate(self._meta_dict)
        
        cont_path = os.path.join(rec_path, 'continuous', processor, 'continuous.dat')
        
        n_chan = self._chan_pd['recorded'].sum()
        se.BinaryRecordingExtractor.__init__(self, cont_path, self._s_f_0, n_chan, np.int16)
        
        self._chan_names = np.array(self._chan_pd .loc[self._chan_pd ['recorded']==True, 'name'])

In [36]:
se.BinaryRecordingExtractor

spikeinterface.core.binaryrecordingextractor.BinaryRecordingExtractor

In [37]:
oe_recording = oeRecordingExtractor(rec_path, 'Acquisition_Board-100.Rhythm Data')

In [38]:
list_processors(rec_path)

['/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101/experiment1/recording1/continuous',
 '/mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101/experiment1/recording1/events']

#### define pre-processing steps for each epoch and for the session

In [39]:
def chan_to_wav(single_bin_path, chan_name, oe_meta_dict, wav_path):
    # get the file n of channels
    chan_info_pd = build_chan_info_pd(oe_meta_dict)
    n_channels = chan_info_pd['recorded'].sum()
    logger.info(n_channels)
    chan_pos = find_chan_order(chan_info_pd, chan_name)
    logger.info(chan_pos)
    # read the file
    bin_fp = np.memmap(single_bin_path, dtype='<i2', mode='r').reshape(-1, n_channels)
    
    #save as wav
    sample_rate = int(get_oe_sample_rate(oe_meta_dict))
    logger.info('writing wave file {}'.format(wav_path))
    wavfile.write(wav_path, sample_rate, bin_fp.T[chan_pos])

#### test the extracting of channels


In [51]:
def run_meta(sess_par: dict, exp_struct: dict, epoch:str, do_sync_to_stream=None) -> dict:
    # get the recording files
    # dump the microphone file into a wav file
    # that's it for now
    
    logger.info('meta for sess {} | epoch {}'.format(sess_par['sess'], epoch))
    
    epoch_path = os.path.join(raw_folder, epoch)
    node_path = get_default_node(exp_struct, epoch)
    rec_path = get_default_recording(node_path)
    cont_path = os.path.join(get_default_continuous(rec_path), 'continuous.dat')
    
    rec_meta = get_rec_meta(rec_path)
    logger.info('getting the recording file ' + cont_path)
    
    # get the rig parameters
    # get the mic channel name in the channels recorded
    # make the folder for the derived data
    # toss the wav file in there
    
    rig_par = et.get_rig_par(exp_struct)
    return rec_meta, rig_par, cont_path

rec_meta, rig_par, single_bin_path = run_meta(sess_par, exp_struct, sess_epochs[0])

2022-09-23 11:15:57,642 root         INFO     meta for sess 2022-09-22 | epoch 2022-09-22_07-24-41
2022-09-23 11:15:57,647 root         INFO     getting the recording file /mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_07-24-41/Record Node 101/experiment1/recording1/continuous/Acquisition_Board-100.Rhythm Data/continuous.dat


In [46]:
chan_info_pd = build_chan_info_pd(rec_meta)

In [47]:
chan_info_pd

Unnamed: 0,number,recorded,name,gain
0,0,1,ADC1,0.000153
1,1,1,ADC2,0.000153


In [49]:
chan_name = 'ADC2'
n_channels = chan_info_pd['recorded'].sum()
logger.info(n_channels)
chan_pos = find_chan_order(chan_info_pd, chan_name)
logger.info(chan_pos)

2022-09-23 11:14:16,444 root         INFO     2
2022-09-23 11:14:16,447 root         INFO     1


In [52]:
bin_fp = np.memmap(single_bin_path, dtype='<i2', mode='r').reshape(-1, n_channels)

In [55]:
bin_fp.shape

(69116416, 2)

In [57]:
bin_fp.T[1]

memmap([10787, 10791, 10787, ..., 10943, 10935, 10915], dtype=int16)

In [62]:
def preprocess_run(sess_par: dict, exp_struct: dict, epoch:str, do_sync_to_stream=None) -> dict:
    # get the recording files
    # dump the microphone file into a wav file
    # that's it for now
    
    logger.info('PREPROCESSING sess {} | epoch {}'.format(sess_par['sess'], epoch))
    
    epoch_path = os.path.join(raw_folder, epoch)
    node_path = get_default_node(exp_struct, epoch)
    rec_path = get_default_recording(node_path)
    cont_path = os.path.join(get_default_continuous(rec_path), 'continuous.dat')
    
    rec_meta = get_rec_meta(rec_path)
    logger.info('getting the recording file ' + cont_path)
    
    # get the rig parameters
    # get the mic channel name in the channels recorded
    # make the folder for the derived data
    # toss the wav file in there
    
    rig_par = et.get_rig_par(exp_struct)
    
    derived_path = os.path.join(exp_struct['folders']['derived'], epoch)
    fu.makedirs(derived_path)
    
    # get the microphone, then the adc channels
    #microhpone
    mic_ch_name = rig_par['chan']['adc']['microphone_0']
    wav_path = os.path.join(derived_path, 'wav_mic.wav')
    logger.info('get microphone from ch {} to wav {}'.format(mic_ch_name, wav_path))
    chan_to_wav(cont_path, mic_ch_name, rec_meta, wav_path)
    
    # adc channels
    for adc_chan in sess_par['adc_list']:
        ch_name = rig_par['chan']['adc'][adc_chan]
        wav_path = os.path.join(derived_path, 'wav_{}.wav'.format(adc_chan))     
        logger.info('get adc chan {} from ch {} to wav file {}'.format(adc_chan, ch_name, wav_path))
        chan_to_wav(cont_path, ch_name, rec_meta, wav_path)
        
    return rec_meta

In [63]:
sess_par['adc_list']

['pressure']

In [68]:
reload(pre)
one_epoch_dict = preprocess_run(sess_par, exp_struct, sess_epochs[4])

2022-09-23 11:20:46,047 root         INFO     PREPROCESSING sess 2022-09-22 | epoch 2022-09-22_18-17-35
2022-09-23 11:20:46,081 root         INFO     getting the recording file /mnt/sphere/speech_bci/raw_data/s_b1376_22/2022-09-22/oe/2022-09-22_18-17-35/Record Node 101/experiment1/recording2/continuous/Acquisition_Board-100.Rhythm Data/continuous.dat
2022-09-23 11:20:46,084 root         INFO     get microphone from ch ADC1 to wav /mnt/sphere/speech_bci/derived_data/s_b1376_22/2022-09-22/oe/2022-09-22_18-17-35/wav_mic.wav
2022-09-23 11:20:46,086 root         INFO     8
2022-09-23 11:20:46,090 root         INFO     0
2022-09-23 11:20:46,108 root         INFO     writing wave file /mnt/sphere/speech_bci/derived_data/s_b1376_22/2022-09-22/oe/2022-09-22_18-17-35/wav_mic.wav
2022-09-23 11:20:49,865 root         INFO     get adc chan pressure from ch ADC2 to wav file /mnt/sphere/speech_bci/derived_data/s_b1376_22/2022-09-22/oe/2022-09-22_18-17-35/wav_pressure.wav
2022-09-23 11:20:49,867 root 

In [11]:
### sequentially process all runs of the sessions
# def preprocess_session(sess_par: dict):
#     logger.info('pre-process all runs of sess ' + sess_par['sess'])
#     # get exp struct
#     sess_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sort=sess_par['sort'])
#     # list the epochs
#     sess_epochs = et.list_sgl_epochs(sess_par)
#     logger.info('found epochs: {}'.format(sess_epochs))
#     # preprocess all epochs
#     epoch_dict_list = []
#     for i_ep, epoch in enumerate(sess_epochs):
#         try:
#             exp_struct = et.sgl_struct(sess_par, epoch)
#             one_epoch_dict = pre.preprocess_run(sess_par, exp_struct, epoch)
#             epoch_dict_list.append(one_epoch_dict)
#         except Exception as exc:
#             warnings.warn('Error in epoch {}'.format(epoch), UserWarning)
#             logger.info(traceback.format_exc)
#             logger.info(exc)
#             logger.info('Session {} epoch {} could not be preprocessed'.format(sess_par['sess'], epoch))
        
#     return epoch_dict_list

# all_epoch_list = preprocess_session(sess_par)

2022-04-25 11:28:43,627 root         INFO     pre-process all runs of sess 2022-04-24
2022-04-25 11:28:43,629 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/sphere/speech_bci/raw_data/s_b1555_22', 'raw': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-24', 'sglx': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-24/sglx', 'kwik': '/scratch/earneodo/s_b1555_22/sglx/kwik/2022-04-24', 'processed': '/mnt/sphere/speech_bci/processed_data/s_b1555_22/2022-04-24/sglx', 'derived': '/mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-04-24/sglx', 'tmp': '/scratch/earneodo/tmp', 'msort': '/scratch/earneodo/s_b1555_22/sglx/msort/2022-04-24', 'ksort': '/scratch/earneodo/s_b1555_22/sglx/ksort/2022-04-24/2', 'sort': '/mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-04-24/sglx/2'}, 'files': {'par': '/scratch/earneodo/s_b1555_22/sglx/ksort/2022-04-24/2/params.json', 'set': '/mnt/sphere/speech_bci/raw_data/s_b1555_22/2022-04-24/sglx/settings.isf', 'rig': '/mnt/sphere/spee

## Process multiple sessions

In [11]:
# sess_list = all_bird_sess
# # fist implant, right hemisphere
# #sess_list = ['2021-06-24', '2021-06-25', '2021-06-26', '2021-06-27', '2021-06-28', '2021-06-29', '2021-06-30']
# sess_list = ['2022-04-16', '2022-04-17', '2022-04-18', '2022-04-24', 
#             '2022-04-28', '2022-04-29'] #pressure with HSCDLNN001PDAA3 sensor

In [141]:
# all_sess_dict = {}

# for one_sess in sess_list[-1:]:
#     sess_par['sess'] = one_sess
#     preprocess_session(sess_par)

## search bouts for those sessions

In [146]:
hparams = { # default parameters work well for starling
    # spectrogram
    'num_freq':1024, #1024# how many channels to use in a spectrogram #
    'preemphasis':0.97, 
    'frame_shift_ms':5, # step size for fft
    'frame_length_ms':10, #128 # frame length for fft FRAME SAMPLES < NUM_FREQ!!!
    'min_level_db':-55, # minimum threshold db for computing spe 
    'ref_level_db':110, # reference db for computing spec
    'sample_rate':None, # sample rate of your data
    
    # spectrograms
    'mel_filter': False, # should a mel filter be used?
    'num_mels':1024, # how many channels to use in the mel-spectrogram
    'fmin': 500, # low frequency cutoff for mel filter
    'fmax': 12000, # high frequency cutoff for mel filter
    
    # spectrogram inversion
    'max_iters':200,
    'griffin_lim_iters':20,
    'power':1.5,

    # Added for the searching
    'read_wav_fun': bs.read_wav_chan, # function for loading the wav_like_stream (has to returns fs, ndarray)
    'file_order_fun': bs.sess_file_id, # function for extracting the file id within the session
    'min_segment': 30, # Minimum length of supra_threshold to consider a 'syllable' (ms)
    'min_silence': 3000, # Minmum distance between groups of syllables to consider separate bouts (ms)
    'min_bout': 3000, # min bout duration (ms)
    'peak_thresh_rms': 0.55, # threshold (rms) for peak acceptance,
    'thresh_rms': 0.25, # threshold for detection of syllables
    'mean_syl_rms_thresh': 0.3, #threshold for acceptance of mean rms across the syllable (relative to rms of the file)
    'max_bout': 180000, #exclude bouts too long
    'l_p_r_thresh': 100, # threshold for n of len_ms/peaks (typycally about 2-3 syllable spans
    
    'waveform_edges': 1000, #get number of ms before and after the edges of the bout for the waveform sample
    
    'bout_auto_file': 'bout_auto.pickle', # extension for saving the auto found files
    'bout_curated_file': 'bout_checked.pickle', #extension for manually curated files (coming soon)
    }

In [147]:
sess_list = ['2022-08-17', #first implant, RH, pressure + RA
            '2022-07-16', #second implant, LH, pressure + RA, but pressure in the first epoch had a HF noise
            ]

In [148]:
#pick sessions to do the bout searching
bout_sessions = sess_list[:1]
bout_sessions

['2022-08-17']

In [155]:
reload(sb)

<module 'ceciestunepipe.pipeline.searchbout' from '/mnt/cube/earneodo/repos/ceciestunepipe/ceciestunepipe/pipeline/searchbout.py'>

In [151]:
#all_sessions = sess_list[-1:]
#all_sessions = ['2021-06-15']

for sess in bout_sessions:
    sess_par['sess'] = sess
    sess_bout_pd = sb.get_all_day_bouts(sess_par, hparams, n_jobs=4, ephys_software='oe', save=True)
    #sb.save_auto_bouts(sess_bout_pd, sess_par, hparams, software='oe')
    #sess_bouts_folder = os.path.join(exp_struct['folders']['derived'], 'bouts')
    #bouts_to_wavs(sess_bout_pd, sess_par, hparams, sess_bouts_folder)

2022-08-18 09:11:50,443 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session s_b1555_22, 2022-08-17
2022-08-18 09:11:50,448 ceciestunepipe.pipeline.searchbout INFO     getting wav files from/mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-08-17/oe
2022-08-18 09:11:50,449 ceciestunepipe.pipeline.searchbout INFO     Found 1 files


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


2022-08-18 09:14:55,002 ceciestunepipe.pipeline.searchbout INFO     getting spectrograms


[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:  3.1min
[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:  3.1min finished


UnboundLocalError: local variable 'sess_bouts_dir' referenced before assignment

In [156]:
sb.save_auto_bouts(sess_bout_pd, sess_par, hparams, software='oe')

2022-08-18 09:26:10,883 ceciestunepipe.pipeline.searchbout INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-08-17/bouts_oe/bout_auto.pickle
2022-08-18 09:26:15,269 ceciestunepipe.pipeline.searchbout INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-08-17/bouts_oe/bout_search_params.pickle
2022-08-18 09:26:15,270 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1555_22/2022-08-17/bouts_oe/bout_search_params.pickle


In [14]:
sess_bout_pd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 883 entries, 0 to 882
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   start_ms        883 non-null    int64  
 1   end_ms          883 non-null    int64  
 2   start_sample    883 non-null    int64  
 3   end_sample      883 non-null    int64  
 4   p_step          883 non-null    object 
 5   rms_p           883 non-null    float64
 6   peak_p          883 non-null    float64
 7   bout_check      883 non-null    bool   
 8   file            883 non-null    object 
 9   len_ms          883 non-null    int64  
 10  syl_in          883 non-null    object 
 11  n_syl           883 non-null    int64  
 12  peaks_p         883 non-null    object 
 13  n_peaks         883 non-null    int64  
 14  l_p_ratio       883 non-null    float64
 15  waveform        883 non-null    object 
 16  confusing       883 non-null    bool   
 17  valid_waveform  883 non-null    boo

In [15]:
np.unique(sess_bout_pd['start_ms']).size

883

# debug

## debug search_bout

In [21]:
## look for a single file
sess = sess_list[0]

exp_struct = et.get_exp_struct(sess_par['bird'], sess, ephys_software='sglx')
source_folder = exp_struct['folders']['derived']
wav_path_list = et.get_sgl_files_epochs(source_folder, file_filter='*wav_mic.wav')
wav_path_list.sort()
logger.info('Found {} files'.format(len(wav_path_list)))
print(wav_path_list)

2021-09-22 15:13:39,371 root         INFO     Found 4 files


['/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/1255_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/1740_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/2118_g0/wav_mic.wav']


In [22]:
one_file = wav_path_list[0]

In [None]:
reload(bs)
epoch_bout_pd, epoch_wav = bs.get_bouts_in_long_file(wav_path_list[0], hparams)

2021-09-22 15:13:45,924 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav


2021-09-22 15:13:45,962 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 5 chunks


  0%|          | 0/5 [00:00<?, ?it/s]