# Pre-processing pipeline for spikeglx sessions, zebra finch
- For every run in the session:
 - Load the recordings
 - Extract wav chan with micrhopohone and make a wav chan with the nidq syn signal
 - Get the sync events for the nidq sync channel
 
 - Do bout detection
 
In another notebook, bout detection is curated
- Left to decide where to:
    - Sort spikes
    - Sync the spikes/lfp/nidq
    - make and plot 'bout rasters'

In [1]:
%matplotlib inline
import os
import glob
import logging
import pickle
import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy import signal
import traceback
import warnings

from matplotlib import pyplot as plt
from importlib import reload

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)


In [2]:
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util import sglxutil as sglu
from ceciestunepipe.util import rigutil as ru
from ceciestunepipe.util import wavutil as wu
from ceciestunepipe.util import syncutil as su

from ceciestunepipe.util.sound import boutsearch as bs

from ceciestunepipe.util.spikeextractors import preprocess as pre
from ceciestunepipe.util.spikeextractors.extractors.spikeglxrecordingextractor import readSGLX as rsgl
from ceciestunepipe.util.spikeextractors.extractors.spikeglxrecordingextractor import spikeglxrecordingextractor as sglex

In [3]:
import spikeinterface as si
import spikeinterface.extractors as se
import spikeinterface.toolkit as st
import spikeinterface.sorters as ss
import spikeinterface.comparison as sc
import spikeinterface.widgets as sw
logger.info('all modules loaded')

2022-02-16 04:47:33,898 root         INFO     all modules loaded


## Session parameters and raw files

#### list all the sessions for this bird

In [4]:
bird = 's_b1253_21'
all_bird_sess = et.list_sessions(bird)
logger.info('all sessions for bird are {}'.format(all_bird_sess))

2022-02-16 04:47:35,394 root         INFO     all sessions for bird are ['2021-05-12', '2021-05-20', '2021-05-21', '2021-05-22', '2021-05-23', '2021-05-24', '2021-05-25', '2021-05-26', '2021-05-27', '2021-05-28', '2021-05-29', '2021-05-30', '2021-05-31', '2021-06-01', '2021-06-02', '2021-06-03', '2021-06-04', '2021-06-05', '2021-06-06', '2021-06-07', '2021-06-08', '2021-06-09', '2021-06-10', '2021-06-11', '2021-06-12', '2021-06-13', '2021-06-14', '2021-06-15', '2021-06-16', '2021-06-17', '2021-06-18', '2021-06-19', '2021-06-20', '2021-06-21', '2021-06-22', '2021-06-23', '2021-06-24', '2021-06-25', '2021-06-26', '2021-06-27', '2021-06-28', '2021-06-29', '2021-06-30', '2021-07-01', '2021-07-02', '2021-07-03', '2021-07-04', '2021-07-05', '2021-07-06', '2021-07-07', '2021-07-08', '2021-07-09', '2021-07-10', '2021-07-11', '2021-07-12', '2021-07-13', '2021-07-14', '2021-07-15', '2021-07-16', '2021-07-17', '2021-07-18', '2021-07-19', '2021-07-20', '2021-07-21', '2021-07-22', '2021-07-23', '20

### set up bird and sessions parameters
this will define:
- locations of files (for the bird)
- signals and channels to look for in the metadata of the files and in the rig.json parameter file: Note that this have to exist in all of the sessions that will be processed
- 'sess' is unimportant here, but it comes handy if there is need to debug usin a single session

In [5]:
reload(et)
# for one example session

sess_par = {'bird': 's_b1253_21',
           'sess': '2021-06-15',
           'probes': ['probe_0'], #probes of interest
           'mic_list': ['microphone_0'], #list of mics of interest, by signal name in rig.json
           'stim_list': ['wav_stim', 'wav_syn'], # list of adc chans with the stimulus
           'nidq_ttl_list': ['wav_ttl'], # list of TTL signals form the nidq digital inputs to extract (besides the 'sync')
           'sort': 2, #label for this sort instance
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sort=sess_par['sort'])

ksort_folder = exp_struct['folders']['ksort']
raw_folder = exp_struct['folders']['sglx']

list all the epochs in a session, to check that it is finding what it has to find

In [6]:
sess_epochs = et.list_sgl_epochs(sess_par)
sess_epochs

2022-02-16 04:47:39,035 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/sphere/speech_bci/raw_data/s_b1253_21', 'raw': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-15', 'sglx': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-15/sglx', 'kwik': '/scratch/earneodo/s_b1253_21/sglx/kwik/2021-06-15', 'processed': '/mnt/sphere/speech_bci/processed_data/s_b1253_21/2021-06-15/sglx', 'derived': '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-15/sglx', 'tmp': '/scratch/earneodo/tmp', 'msort': '/scratch/earneodo/s_b1253_21/sglx/msort/2021-06-15', 'ksort': '/scratch/earneodo/s_b1253_21/sglx/ksort/2021-06-15/2', 'sort': '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-15/sglx/2'}, 'files': {'par': '/scratch/earneodo/s_b1253_21/sglx/ksort/2021-06-15/2/params.json', 'set': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-15/sglx/settings.isf', 'rig': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-15/sglx/rig.json', 'kwd': '/scratch/earneodo/s_b125

['0701_g0', '0928_g0', '1227_g0', '1617_g0']

#### define pre-processing steps for each epoch and for the session

In [7]:
reload(pre)
one_epoch_dict = pre.preprocess_run(sess_par, exp_struct, sess_epochs[0], do_sync_to_stream='ap_0')

2022-02-16 04:47:46,221 ceciestunepipe.util.spikeextractors.preprocess INFO     PREPROCESSING sess 2021-06-15 | epoch 0701_g0
2022-02-16 04:47:46,222 ceciestunepipe.util.spikeextractors.preprocess INFO     getting extractors
2022-02-16 04:47:46,280 ceciestunepipe.util.spikeextractors.preprocess INFO     Got sglx recordings for keys ['nidq', 'lf_0', 'ap_0']
2022-02-16 04:47:46,281 ceciestunepipe.util.spikeextractors.preprocess INFO     Getting microphone channel(s) ['microphone_0']
2022-02-16 04:47:46,281 ceciestunepipe.util.wavutil INFO     sampling rate 25000
2022-02-16 04:47:46,282 ceciestunepipe.util.wavutil INFO     saving (1, 220413236)-shaped array as wav in /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-15/sglx/0701_g0/wav_mic.wav
2022-02-16 04:47:49,991 ceciestunepipe.util.wavutil INFO     saving (1, 220413236)-shaped array as npy in /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-15/sglx/0701_g0/wav_mic.npy
2022-02-16 04:47:54,383 ceciestunepipe.util.spikeextrac

In [10]:
### sequentially process all runs of the sessions
def preprocess_session(sess_par: dict):
    logger.info('pre-process all runs of sess ' + sess_par['sess'])
    # get exp struct
    sess_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sort=sess_par['sort'])
    # list the epochs
    sess_epochs = et.list_sgl_epochs(sess_par)
    logger.info('found epochs: {}'.format(sess_epochs))
    # preprocess all epochs
    epoch_dict_list = []
    for i_ep, epoch in enumerate(sess_epochs):
        try:
            exp_struct = et.sgl_struct(sess_par, epoch)
            one_epoch_dict = pre.preprocess_run(sess_par, exp_struct, epoch)
            epoch_dict_list.append(one_epoch_dict)
        except Exception as exc:
            warnings.warn('Error in epoch {}'.format(epoch), UserWarning)
            logger.info(traceback.format_exc)
            logger.info(exc)
            logger.info('Session {} epoch {} could not be preprocessed'.format(sess_par['sess'], epoch))
        
    return epoch_dict_list

#all_epoch_list = preprocess_session(sess_par)

## Process multiple sessions

In [11]:
sess_list = all_bird_sess
# fist implant, right hemisphere
sess_list = ['2021-06-12', '2021-06-13', '2021-06-14', '2021-06-15', '2021-06-16', '2021-06-17', '2021-07-01', '2021-07-02', '2021-07-03',
            '2021-07-17', '2021-07-18', '2021-07-19', '2021-07-22', '2021-07-23', '2021-07-24', '2021-07-25', '2021-07-26', '2021-07-27', 
            '2021-07-28', '2021-07-29', '2021-07-30', '2021-07-31']
#sess_list = ['2021-06-12', '2021-06-13']

In [12]:
all_sess_dict = {}

for one_sess in sess_list[:]:
    sess_par['sess'] = one_sess
    preprocess_session(sess_par)

2022-02-16 05:21:24,170 root         INFO     pre-process all runs of sess 2021-06-12
2022-02-16 05:21:24,172 ceciestunepipe.file.bcistructure INFO     {'folders': {'bird': '/mnt/sphere/speech_bci/raw_data/s_b1253_21', 'raw': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-12', 'sglx': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-12/sglx', 'kwik': '/scratch/earneodo/s_b1253_21/sglx/kwik/2021-06-12', 'processed': '/mnt/sphere/speech_bci/processed_data/s_b1253_21/2021-06-12/sglx', 'derived': '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx', 'tmp': '/scratch/earneodo/tmp', 'msort': '/scratch/earneodo/s_b1253_21/sglx/msort/2021-06-12', 'ksort': '/scratch/earneodo/s_b1253_21/sglx/ksort/2021-06-12/2', 'sort': '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/2'}, 'files': {'par': '/scratch/earneodo/s_b1253_21/sglx/ksort/2021-06-12/2/params.json', 'set': '/mnt/sphere/speech_bci/raw_data/s_b1253_21/2021-06-12/sglx/settings.isf', 'rig': '/mnt/sphere/spee

In [15]:
sess_par

{'bird': 's_b1253_21',
 'sess': '2021-07-18',
 'probes': ['probe_0'],
 'mic_list': ['microphone_0'],
 'sort': 2}

In [16]:
# Search bouts

## search bouts for those sessions

In [45]:
from ceciestunepipe.util.sound import boutsearch as bs
from ceciestunepipe.util import wavutil as wu

from joblib import Parallel, delayed
import pickle
import sys

In [46]:
def sess_file_id(f_path):
    n = int(os.path.split(f_path)[1].split('-')[-1].split('.wav')[0])
    return n


def get_all_day_bouts(sess_par: dict, hparams:dict, n_jobs: int=12, ephys_software='sglx', 
                     parallel=True) -> pd.DataFrame:
    
    logger.info('Will search for bouts through all session {}, {}'.format(sess_par['bird'], sess_par['sess']))
    exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], ephys_software=ephys_software)

    # get all the paths to the wav files of the epochs of the day   
    source_folder = exp_struct['folders']['derived']
    wav_path_list = et.get_sgl_files_epochs(source_folder, file_filter='*wav_mic.wav')
    wav_path_list.sort()
    logger.info('Found {} files'.format(len(wav_path_list)))
    print(wav_path_list)
    
    get_file_bouts = lambda path: bs.get_epoch_bouts(path, hparams)
    # Go parallel through all the paths in the day, get a list of all the pandas dataframes for each file
    if parallel:
        sess_pd_list = Parallel(n_jobs=n_jobs, verbose=100, prefer='threads')(delayed(get_file_bouts)(i) for i in wav_path_list)
    else:
        sess_pd_list = [get_file_bouts(i) for i in wav_path_list]
    
    #concatenate the file and return it, eventually write to a pickle
    sess_bout_pd = pd.concat(sess_pd_list)
    return sess_bout_pd

def save_auto_bouts(sess_bout_pd, sess_par, hparams):
    exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], ephys_software='bouts_sglx')
    #sess_bouts_dir = os.path.join(exp_struct['folders']['derived'], 'bouts_ceciestunepipe')
    sess_bouts_dir = exp_struct['folders']['derived']

    sess_bouts_path = os.path.join(sess_bouts_dir, hparams['bout_auto_file'])
    hparams_pickle_path = os.path.join(sess_bouts_dir, 'bout_search_params.pickle')

    os.makedirs(sess_bouts_dir, exist_ok=True)
    logger.info('saving bouts pandas to ' + sess_bouts_path)
    sess_bout_pd.to_pickle(sess_bouts_path)

    logger.info('saving bout detect parameters dict to ' + hparams_pickle_path)
    with open(hparams_pickle_path, 'wb') as fh:
        pickle.dump(hparams, fh)

In [47]:
hparams = {
    # spectrogram
    'num_freq':1024, #1024# how many channels to use in a spectrogram #
    'preemphasis':0.97, 
    'frame_shift_ms':5, # step size for fft
    'frame_length_ms':10, #128 # frame length for fft FRAME SAMPLES < NUM_FREQ!!!
    'min_level_db':-55, # minimum threshold db for computing spe 
    'ref_level_db':110, # reference db for computing spec
    #'sample_rate':None, # sample rate of your data
    
    # spectrograms
    'mel_filter': False, # should a mel filter be used?
    'num_mels':1024, # how many channels to use in the mel-spectrogram
    'fmin': 500, # low frequency cutoff for mel filter
    'fmax': 12000, # high frequency cutoff for mel filter
    
    # spectrogram inversion
    'max_iters':200,
    'griffin_lim_iters':20,
    'power':1.5,

    # Added for the searching
    'read_wav_fun': wu.read_wav_chan, # function for loading the wav_like_stream (has to returns fs, ndarray)
    'file_order_fun': sess_file_id, # function for extracting the file id within the session
    'min_segment': 30, # Minimum length of supra_threshold to consider a 'syllable' (ms)
    'min_silence': 2000, # Minmum distance between groups of syllables to consider separate bouts (ms)
    'min_bout': 5000, # min bout duration (ms)
    'peak_thresh_rms': 0.55, # threshold (rms) for peak acceptance,
    'thresh_rms': 0.25, # threshold for detection of syllables
    'mean_syl_rms_thresh': 0.3, #threshold for acceptance of mean rms across the syllable (relative to rms of the file)
    'max_bout': 120000, #exclude bouts too long
    'l_p_r_thresh': 100, # threshold for n of len_ms/peaks (typycally about 2-3 syllable spans
    
    'waveform_edges': 1000, #get number of ms before and after the edges of the bout for the waveform sample
    
    'bout_auto_file': 'bout_auto.pickle', # extension for saving the auto found files
    'bout_curated_file': 'bout_checked.pickle', #extension for manually curated files (coming soon)
    }

In [48]:
all_sessions = sess_list[:2]
#all_sessions = ['2021-06-15']

for sess in all_sessions:
    sess_par['sess'] = sess
    sess_bout_pd = get_all_day_bouts(sess_par, hparams, parallel=False)
    save_auto_bouts(sess_bout_pd, sess_par, hparams)
    sess_bouts_folder = os.path.join(exp_struct['folders']['derived'], 'bouts')
    #bouts_to_wavs(sess_bout_pd, sess_par, hparams, sess_bouts_folder)

2021-12-03 16:54:27,329 root         INFO     Will search for bouts through all session s_b1253_21, 2021-06-12
2021-12-03 16:54:27,615 root         INFO     Found 3 files
2021-12-03 16:54:27,616 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1132_g0/wav_mic.wav


['/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1132_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1631_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/714_g0/wav_mic.wav']
tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1132_g0/wav_mic.wav


2021-12-03 16:54:27,699 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 2 chunks


  0%|          | 0/2 [00:00<?, ?it/s]

2021-12-03 16:56:09,773 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1132_g0/bout_search_params.pickle
2021-12-03 16:56:09,841 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1132_g0/bout_auto.pickle
2021-12-03 16:56:09,903 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1631_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1631_g0/wav_mic.wav


2021-12-03 16:56:10,022 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 4 chunks


  0%|          | 0/4 [00:00<?, ?it/s]

2021-12-03 17:01:39,870 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1631_g0/bout_search_params.pickle
2021-12-03 17:01:40,038 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/1631_g0/bout_auto.pickle
2021-12-03 17:01:40,210 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/714_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/714_g0/wav_mic.wav


2021-12-03 17:01:40,274 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 4 chunks


  0%|          | 0/4 [00:00<?, ?it/s]

2021-12-03 17:07:15,074 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/714_g0/bout_search_params.pickle
2021-12-03 17:07:15,202 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/sglx/714_g0/bout_auto.pickle
2021-12-03 17:07:15,660 root         INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/bouts_sglx/bout_auto.pickle
2021-12-03 17:07:16,285 root         INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-12/bouts_sglx/bout_search_params.pickle
2021-12-03 17:07:16,288 root         INFO     Will search for bouts through all session s_b1253_21, 2021-06-13
2021-12-03 17:07:16,760 root         INFO     Found 5 files
2021-12-03 17:07:16,761 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/s

['/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/0709_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1227_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1819_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/2342_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/713_g0/wav_mic.wav']
tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/0709_g0/wav_mic.wav


2021-12-03 17:07:16,812 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 1 chunks


  0%|          | 0/1 [00:00<?, ?it/s]

2021-12-03 17:07:19,653 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/0709_g0/bout_search_params.pickle
2021-12-03 17:07:19,671 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/0709_g0/bout_auto.pickle
2021-12-03 17:07:19,708 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1227_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1227_g0/wav_mic.wav


2021-12-03 17:07:19,762 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 3 chunks


  0%|          | 0/3 [00:00<?, ?it/s]

2021-12-03 17:10:29,324 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1227_g0/bout_search_params.pickle
2021-12-03 17:10:29,560 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1227_g0/bout_auto.pickle
2021-12-03 17:10:29,853 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1819_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1819_g0/wav_mic.wav


2021-12-03 17:10:29,924 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 3 chunks


  0%|          | 0/3 [00:00<?, ?it/s]

2021-12-03 17:13:08,877 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1819_g0/bout_search_params.pickle
2021-12-03 17:13:09,010 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/1819_g0/bout_auto.pickle
2021-12-03 17:13:09,157 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/2342_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/2342_g0/wav_mic.wav


2021-12-03 17:13:09,214 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 5 chunks


  0%|          | 0/5 [00:00<?, ?it/s]

2021-12-03 17:18:32,419 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/2342_g0/bout_search_params.pickle
2021-12-03 17:18:32,535 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/2342_g0/bout_auto.pickle
2021-12-03 17:18:34,546 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/713_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/713_g0/wav_mic.wav


2021-12-03 17:18:34,558 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 5 chunks


  0%|          | 0/5 [00:00<?, ?it/s]

2021-12-03 17:23:52,669 ceciestunepipe.util.sound.boutsearch INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/713_g0/bout_search_params.pickle
2021-12-03 17:23:52,883 ceciestunepipe.util.sound.boutsearch INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/sglx/713_g0/bout_auto.pickle
2021-12-03 17:23:53,465 root         INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/bouts_sglx/bout_auto.pickle
2021-12-03 17:23:56,417 root         INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-13/bouts_sglx/bout_search_params.pickle


In [22]:
sess_bout_pd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103 entries, 0 to 37
Data columns (total 17 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   start_ms      103 non-null    int64  
 1   end_ms        103 non-null    int64  
 2   start_sample  103 non-null    int64  
 3   end_sample    103 non-null    int64  
 4   p_step        103 non-null    object 
 5   rms_p         103 non-null    float64
 6   peak_p        103 non-null    float64
 7   bout_check    103 non-null    bool   
 8   file          103 non-null    object 
 9   len_ms        103 non-null    int64  
 10  syl_in        103 non-null    object 
 11  n_syl         103 non-null    int64  
 12  peaks_p       103 non-null    object 
 13  n_peaks       103 non-null    int64  
 14  l_p_ratio     103 non-null    float64
 15  waveform      103 non-null    object 
 16  confusing     103 non-null    bool   
dtypes: bool(2), float64(3), int64(7), object(5)
memory usage: 13.1+ KB


In [24]:
np.unique(sess_bout_pd['start_ms']).size

103

# debug

## debug search_bout

In [21]:
## look for a single file
sess = sess_list[0]

exp_struct = et.get_exp_struct(sess_par['bird'], sess, ephys_software='sglx')
source_folder = exp_struct['folders']['derived']
wav_path_list = et.get_sgl_files_epochs(source_folder, file_filter='*wav_mic.wav')
wav_path_list.sort()
logger.info('Found {} files'.format(len(wav_path_list)))
print(wav_path_list)

2021-09-22 15:13:39,371 root         INFO     Found 4 files


['/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/1255_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/1740_g0/wav_mic.wav', '/mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/2118_g0/wav_mic.wav']


In [22]:
one_file = wav_path_list[0]

In [None]:
reload(bs)
epoch_bout_pd, epoch_wav = bs.get_bouts_in_long_file(wav_path_list[0], hparams)

2021-09-22 15:13:45,924 ceciestunepipe.util.sound.boutsearch INFO     Getting bouts for long file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav


tu vieja file /mnt/sphere/speech_bci/derived_data/s_b1253_21/2021-06-14/sglx/0712_g0/wav_mic.wav


2021-09-22 15:13:45,962 ceciestunepipe.util.sound.boutsearch INFO     splitting file into 5 chunks


  0%|          | 0/5 [00:00<?, ?it/s]