## Searching for bouts for a day of alsa recording

In [1]:
%matplotlib inline
import os
import glob
import socket
import logging
import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy import signal

from matplotlib import pyplot as plt

from importlib import reload

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

logger.info('Running on {}'.format(socket.gethostname()))

2022-03-08 10:42:26,968 root         INFO     Running on pakhi


In [2]:
from ceciestunepipe.file import bcistructure as et
from ceciestunepipe.util.sound import boutsearch as bs
from ceciestunepipe.pipeline import searchbout as sb

### Get the file locations for a session (day) of recordings

In [3]:
reload(et)
sess_par = {'bird': 's_b1585_22',
           'sess': '2022-02-23'}


exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], ephys_software='alsa')

raw_folder = exp_struct['folders']['alsa']

In [4]:
exp_struct['folders']

{'bird': '/mnt/sphere/speech_bci/raw_data/s_b1585_22',
 'raw': '/mnt/sphere/speech_bci/raw_data/s_b1585_22/2022-02-23',
 'alsa': '/mnt/sphere/speech_bci/raw_data/s_b1585_22/2022-02-23/alsa',
 'kwik': '/scratch/earneodo/s_b1585_22/alsa/kwik/2022-02-23',
 'processed': '/mnt/sphere/speech_bci/processed_data/s_b1585_22/2022-02-23/alsa',
 'derived': '/mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-23/alsa',
 'tmp': '/scratch/earneodo/tmp',
 'msort': '/scratch/earneodo/s_b1585_22/alsa/msort/2022-02-23',
 'ksort': '/scratch/earneodo/s_b1585_22/alsa/ksort/2022-02-23/',
 'sort': '/mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-23/alsa/'}

#### search parameters
Spectrograms are computed using librosa

Additional parameters are for bout search criteria and functions to read the data

In [5]:
hparams = {
    # spectrogram
    'num_freq':1024, #1024# how many channels to use in a spectrogram #
    'preemphasis':0.97, 
    'frame_shift_ms':5, # step size for fft
    'frame_length_ms':10, #128 # frame length for fft FRAME SAMPLES < NUM_FREQ!!!
    'min_level_db':-55, # minimum threshold db for computing spe 
    'ref_level_db':110, # reference db for computing spec
    'sample_rate':None, # sample rate of your data
    
    # spectrograms
    'mel_filter': False, # should a mel filter be used?
    'num_mels':1024, # how many channels to use in the mel-spectrogram
    'fmin': 500, # low frequency cutoff for mel filter
    'fmax': 12000, # high frequency cutoff for mel filter
    
    # spectrogram inversion
    'max_iters':200,
    'griffin_lim_iters':20,
    'power':1.5,

    # Added for the searching
    'read_wav_fun': bs.read_wav_chan, # function for loading the wav_like_stream (has to returns fs, ndarray)
    'file_order_fun': bs.sess_file_id, # function for extracting the file id within the session
    'min_segment': 30, # Minimum length of supra_threshold to consider a 'syllable' (ms)
    'min_silence': 2000, # Minmum distance between groups of syllables to consider separate bouts (ms)
    'min_bout': 5000, # min bout duration (ms)
    'peak_thresh_rms': 0.55, # threshold (rms) for peak acceptance,
    'thresh_rms': 0.25, # threshold for detection of syllables
    'mean_syl_rms_thresh': 0.3, #threshold for acceptance of mean rms across the syllable (relative to rms of the file)
    'max_bout': 120000, #exclude bouts too long
    'l_p_r_thresh': 100, # threshold for n of len_ms/peaks (typycally about 2-3 syllable spans
    
    'waveform_edges': 1000, #get number of ms before and after the edges of the bout for the waveform sample
    
    'bout_auto_file': 'bout_auto.pickle', # extension for saving the auto found files
    'bout_curated_file': 'bout_checked.pickle', #extension for manually curated files (coming soon)
    }

## Use hi level funcitons to get all bouts for a list of sessions
 - list the sessions
 - check if bouts exist for that session
     - if yes, do nothing (or summary)
     - if not, get the bouts
     

### search for an explicit list of sessions

In [6]:
all_sessions = et.list_sessions(sess_par['bird'], section='raw', ephys_software='alsa')
all_sessions.sort()
print(all_sessions)

['2022-02-16', '2022-02-17', '2022-02-18', '2022-02-19', '2022-02-20', '2022-02-21', '2022-02-22', '2022-02-23', '2022-02-24', '2022-02-26', '2022-02-27', '2022-02-28', '2022-03-01', '2022-03-02', '2022-03-03', '2022-03-04', '2022-03-05', '2022-03-06', '2022-03-07', '2022-03-08']


In [11]:
reload(sb)
sb.search_bird_bouts(sess_par['bird'], all_sessions, hparams, ephys_software='alsa', n_jobs=12)

2022-02-24 09:43:22,744 ceciestunepipe.pipeline.searchbout INFO     Getting all bouts in bird s_b1585_22 sessions ['2022-02-16', '2022-02-17', '2022-02-18', '2022-02-19', '2022-02-20', '2022-02-21', '2022-02-22', '2022-02-23', '2022-02-24']
2022-02-24 09:43:22,745 ceciestunepipe.pipeline.searchbout INFO     Search/bouts file not found in /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-16/alsa/bouts_ceciestunepipe
2022-02-24 09:43:22,746 ceciestunepipe.pipeline.searchbout INFO     Will search bouts for Bird s_b1585_22 - sess 2022-02-16
2022-02-24 09:43:22,746 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session s_b1585_22, 2022-02-16
2022-02-24 09:43:22,763 ceciestunepipe.pipeline.searchbout INFO     Found 10 files


[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   1 tasks      | elapsed:   10.2s
[Parallel(n_jobs=12)]: Done   2 out of  10 | elapsed:   10.2s remaining:   40.8s
[Parallel(n_jobs=12)]: Done   3 out of  10 | elapsed:   10.2s remaining:   23.8s
[Parallel(n_jobs=12)]: Done   4 out of  10 | elapsed:   10.2s remaining:   15.3s
[Parallel(n_jobs=12)]: Done   5 out of  10 | elapsed:   10.2s remaining:   10.2s
[Parallel(n_jobs=12)]: Done   6 out of  10 | elapsed:   10.2s remaining:    6.8s
[Parallel(n_jobs=12)]: Done   7 out of  10 | elapsed:   10.2s remaining:    4.4s
[Parallel(n_jobs=12)]: Done   8 out of  10 | elapsed:   10.2s remaining:    2.5s
[Parallel(n_jobs=12)]: Done  10 out of  10 | elapsed:   10.2s remaining:    0.0s


KeyboardInterrupt: 

### search for days before

In [12]:
reload(sb)
sb.all_bird_bouts_search(sess_par['bird'], 3, hparams, ephys_software='alsa', n_jobs=12)

2022-02-24 09:43:39,040 ceciestunepipe.pipeline.searchbout INFO     Getting all bouts for bird s_b1585_22 from date 2022-02-19 onward
2022-02-24 09:43:39,042 ceciestunepipe.pipeline.searchbout INFO     Getting all bouts in bird s_b1585_22 sessions ['2022-02-20', '2022-02-21', '2022-02-22', '2022-02-23', '2022-02-24']
2022-02-24 09:43:39,043 ceciestunepipe.pipeline.searchbout INFO     Bird s_b1585_22 already had a bouts file in sess 2022-02-20
2022-02-24 09:43:39,045 ceciestunepipe.pipeline.searchbout INFO     Bird s_b1585_22 already had a bouts file in sess 2022-02-21
2022-02-24 09:43:39,047 ceciestunepipe.pipeline.searchbout INFO     Bird s_b1585_22 already had a bouts file in sess 2022-02-22
2022-02-24 09:43:39,048 ceciestunepipe.pipeline.searchbout INFO     Bird s_b1585_22 already had a bouts file in sess 2022-02-23
2022-02-24 09:43:39,048 ceciestunepipe.pipeline.searchbout INFO     Search/bouts file not found in /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-24/alsa/bouts_c

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   1 tasks      | elapsed:   29.9s
[Parallel(n_jobs=12)]: Done   2 out of   4 | elapsed:   30.1s remaining:   30.1s


2022-02-24 09:44:10,399 ceciestunepipe.pipeline.searchbout INFO     getting spectrograms


[Parallel(n_jobs=12)]: Done   4 out of   4 | elapsed:   31.3s remaining:    0.0s
[Parallel(n_jobs=12)]: Done   4 out of   4 | elapsed:   31.3s finished


2022-02-24 09:44:31,375 ceciestunepipe.pipeline.searchbout INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-24/alsa/bouts_ceciestunepipe/bout_auto.pickle
2022-02-24 09:44:33,099 ceciestunepipe.pipeline.searchbout INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-24/alsa/bouts_ceciestunepipe/bout_search_params.pickle


0

In [12]:
lpd = pd.read_pickle('/mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-02-24/alsa/bouts_ceciestunepipe/bout_auto.pickle')
lpd.keys()

Index(['start_ms', 'end_ms', 'start_sample', 'end_sample', 'p_step', 'rms_p',
       'peak_p', 'bout_check', 'file', 'len_ms', 'syl_in', 'n_syl', 'peaks_p',
       'n_peaks', 'l_p_ratio', 'waveform', 'valid_waveform', 'valid',
       'spectrogram'],
      dtype='object')

In [10]:
epd = pd.DataFrame()
epd.index.size

0

In [28]:
epd.reset_index()

Unnamed: 0,index


In [13]:
lpd.index.size

84

### example of search of one bird, one session with force=True/False

In [8]:
reload(sb)
sb.search_bird_bouts('s_b1585_22', ['2022-03-07'], hparams, ephys_software='alsa', n_jobs=12, force=True)

2022-03-08 10:46:08,787 ceciestunepipe.pipeline.searchbout INFO     Getting all bouts in bird s_b1585_22 sessions ['2022-03-07']
2022-03-08 10:46:08,788 ceciestunepipe.pipeline.searchbout INFO     Will search bouts for Bird s_b1585_22 - sess 2022-03-07
2022-03-08 10:46:08,789 ceciestunepipe.pipeline.searchbout INFO     Will search for bouts through all session s_b1585_22, 2022-03-07
2022-03-08 10:46:08,793 ceciestunepipe.pipeline.searchbout INFO     Found 30 files


[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   1 tasks      | elapsed: 18.5min
[Parallel(n_jobs=12)]: Done   2 tasks      | elapsed: 19.0min
[Parallel(n_jobs=12)]: Done   3 tasks      | elapsed: 19.1min
[Parallel(n_jobs=12)]: Done   4 tasks      | elapsed: 19.1min
[Parallel(n_jobs=12)]: Done   5 tasks      | elapsed: 19.1min
[Parallel(n_jobs=12)]: Done   6 tasks      | elapsed: 19.1min
[Parallel(n_jobs=12)]: Done   7 tasks      | elapsed: 19.2min
[Parallel(n_jobs=12)]: Done   8 out of  30 | elapsed: 19.2min remaining: 52.8min
[Parallel(n_jobs=12)]: Done   9 out of  30 | elapsed: 19.2min remaining: 44.9min
[Parallel(n_jobs=12)]: Done  10 out of  30 | elapsed: 19.3min remaining: 38.5min
[Parallel(n_jobs=12)]: Done  11 out of  30 | elapsed: 19.3min remaining: 33.3min
[Parallel(n_jobs=12)]: Done  12 out of  30 | elapsed: 19.4min remaining: 29.1min
[Parallel(n_jobs=12)]: Done  13 out of  30 | elapsed: 19.5min remaining: 25.5min
[Pa

2022-03-08 11:10:36,845 ceciestunepipe.pipeline.searchbout INFO     getting spectrograms
2022-03-08 11:11:34,003 ceciestunepipe.pipeline.searchbout INFO     saving bouts pandas to /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-03-07/alsa/bouts_ceciestunepipe/bout_auto.pickle
2022-03-08 11:11:38,675 ceciestunepipe.pipeline.searchbout INFO     saving bout detect parameters dict to /mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-03-07/alsa/bouts_ceciestunepipe/bout_search_params.pickle


0

In [31]:
bird = 's_b1238_22'
bird.split('_')[0] == 's'

True

In [34]:
'spectrogram' in lpd.keys()

True

In [14]:
lpd = pd.read_pickle('/mnt/sphere/speech_bci/derived_data/s_b1585_22/2022-03-07/alsa/bouts_ceciestunepipe/bout_auto.pickle')
lpd.keys()

Index(['start_ms', 'end_ms', 'start_sample', 'end_sample', 'p_step', 'rms_p',
       'peak_p', 'bout_check', 'file', 'len_ms', 'syl_in', 'n_syl', 'peaks_p',
       'n_peaks', 'l_p_ratio', 'waveform', 'valid_waveform', 'valid',
       'spectrogram'],
      dtype='object')

In [15]:
lpd.index.size

157