## Look at and get metadata on parsed bouts (alsa/ephys)
- load the bouts for a bird, session
- make a summary of all the days that have manually curated bouts

In [1]:
import os
import glob
import socket
import logging
import pickle
import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy import signal

### Fuck matplotlib, I'm using poltly now
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from importlib import reload

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

logger.info('Running on {}'.format(socket.gethostname()))

2022-11-09 07:44:11,674 root         INFO     Running on pakhi.ucsd.edu


In [2]:
from ceciestunepipe.file import bcistructure as et

### set the session  file structure for a bird, day
- ephys_software:
                'alsa': the recordings using just the UMA-8 USB microphone
                'sglx': the recordings with the ephys system (npx) using an earthworks microphone
                'oe': the recordings with the ephys system (open-ephys/neuronexus) 
                        using an earthworks microphone

In [3]:
reload(et)
sess_par = {'bird': 's_b1555_22',
           'sess': '2022-11-06'}

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], ephys_software='alsa')

raw_folder = exp_struct['folders']['alsa']
bouts_folder = os.path.join(exp_struct['folders']['derived'], 'bouts_ceciestunepipe')

In [6]:
ls $bouts_folder

[0m[01;32mbout_auto.pickle[0m*  bout_checked.pickle  [01;32mbout_search_params.pickle[0m*


### load the bouts for the session (day)
- recording software: (same as ephys software in the get_exp_struct function)

- curated:
    True: will load the bout_checked.pickle (with the manual curation checks)
    False: will load the bout_auto.pickle (just the auto bout detection)

In [8]:
from ceciestunepipe.pipeline import searchbout as sb

In [21]:
hparams, bout_pd = sb.read_session_bouts(sess_par['bird'], sess_par['sess'], recording_software='alsa', curated=True)

#### File details:
bout_pd: pd.DataFrame
    # start, end: start (in ms, samples) of the dectected bout in the raw file
    waveform: np.array. The chunk of raw data
    
    MANUAL CURATION CHECKS (bool)
    bout_check: bool. wether it was a bout (or noise)
    is_call: bool. wheter it was a bout of calls
    confusing: wheter the curator was confused about what it was (noise, calls, bout of song)
    
    other fields to be documented next
    
hparams: dict
    # parameters for the spectrogram, amplitude, detection and extraction of the bouts
    sample_rate: sample rate of the raw file (and the waveform in the corresponding bouts pandas DataFrame)
    waveform_edges: how many samples before and after the onset/offset detection are included in the waveform in the corresponding bout pandas DataFrame)

In [12]:
hparams

{'num_freq': 1024,
 'preemphasis': 0.97,
 'frame_shift_ms': 5,
 'frame_length_ms': 10,
 'min_level_db': -55,
 'ref_level_db': 110,
 'sample_rate': 48000,
 'mel_filter': False,
 'num_mels': 1024,
 'fmin': 500,
 'fmax': 12000,
 'max_iters': 200,
 'griffin_lim_iters': 20,
 'power': 1.5,
 'read_wav_fun': <function ceciestunepipe.util.sound.boutsearch.read_wav_chan(wav_path: str, chan_id: int = 0, return_int16=True) -> tuple>,
 'file_order_fun': <function ceciestunepipe.util.sound.boutsearch.sess_file_id(f_path)>,
 'min_segment': 30,
 'min_silence': 3000,
 'min_bout': 3000,
 'peak_thresh_rms': 0.55,
 'thresh_rms': 0.25,
 'mean_syl_rms_thresh': 0.3,
 'max_bout': 180000,
 'l_p_r_thresh': 100,
 'waveform_edges': 1000,
 'bout_auto_file': 'bout_auto.pickle',
 'bout_curated_file': 'bout_checked.pickle'}

In [22]:
bout_pd.head(2)

Unnamed: 0,start_ms,end_ms,start_sample,end_sample,p_step,rms_p,peak_p,bout_check,file,len_ms,...,n_syl,peaks_p,n_peaks,l_p_ratio,waveform,valid_waveform,valid,spectrogram,confusing,is_call
0,1064325,1106785,51087600,53125680,"[1.7835915647510105, 4.344083020852177, 4.3568...",1.546699,98.966014,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,42460,...,96,"[2, 203, 213, 606, 614, 630, 637, 644, 656, 66...",338,125.621302,"[-5, 1, 3, 2, -2, -6, -3, 0, 0, -1, -1, 0, 2, ...",True,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",False,False
1,1080900,1117105,51883200,53621040,"[1.5666338818639305, 1.5581686644532489, 2.367...",2.365829,82.393821,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,36205,...,67,"[7, 19, 69, 377, 393, 406, 418, 426, 434, 441,...",256,141.425781,"[-1, -1, 1, 0, -1, 0, 1, 0, -1, 1, 0, 0, -1, -...",True,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",False,False


#### Filter all bouts of song

In [23]:
bout_sel = (bout_pd['valid']==True) & (bout_pd['bout_check']==True) & (bout_pd['confusing']==False) & (bout_pd['is_call']==False)

song_bout_pd = bout_pd.loc[bout_sel]

### plot one bout of song

In [24]:
import plotly.express as px
import plotly.graph_objects as go

from ipywidgets import widgets

In [25]:
def viz_one_bout(df: pd.Series, sub_sample=1):
    # get the power and the spectrogram
    sxx = df['spectrogram'][:, ::sub_sample]
    x = df['waveform'][::sub_sample]
    
    # the trace
    tr_waveform = go.Scatter(y=x)
    figwidg_waveform = go.FigureWidget(data=[tr_waveform],
                                      layout= {'height': 300,'width':1000})

    # the spectrogram
    fig_spectrogram = px.imshow(sxx, 
                                     labels={}, 
                                     color_continuous_scale='Inferno',
                                    aspect='auto')

    fig_spectrogram.update_layout(width=1000, height=300, coloraxis_showscale=False)
    fig_spectrogram.update_xaxes(showticklabels=False)
    fig_spectrogram.update_yaxes(showticklabels=False)
    
    
    figwidg_spectrogram = go.FigureWidget(fig_spectrogram)
    
    display(widgets.VBox([figwidg_waveform,
              figwidg_spectrogram]))

In [26]:
viz_one_bout(song_bout_pd.loc[0])

VBox(children=(FigureWidget({
    'data': [{'type': 'scatter',
              'uid': '4ed5bb45-9ea4-479a-81e9-0…

### to do:
- list all the sessions of a bird that have files of bouts (auto and curated)
- get number of good bouts of song for each
- make a dataframe of that metadata
- plot statistics of bouts per day?