## read lvd files and flatclust exported files

In [27]:
%matplotlib ipympl

In [2]:
import numpy as np
import logging
import socket
import glob
import os
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from importlib import reload
import scipy.io as sio
from scipy import stats  

#from intan2kwik import kwd

#mountainsort imports (for sorting)
#import mountainlab_pytools.mlproc as mlp
from pipefinch.pipeline import sglxutil as sglu
from pipefinch.h5tools.kwik import event as ev
from pipefinch.pipeline import filestructure as et
from pipefinch.recorder.core import data as rd, setting as sd
from pipefinch.neural.postsort import session



# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-09-18 11:05:05,130 - root - INFO - Logger set
2019-09-18 11:05:05,132 - root - INFO - Hostname zpikezorter


In [3]:
reload(et)

sess_par = {'bird': 'b11k10',
            'sess': '20190715_02',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
           'sort': 1, 
           'epoch': None, # for the subfolder in the neuropix data}
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

# visualization default parameters
viz_par = { 'evt_name': 'motif',
           'evt_signal': 'trig_perceptron',
            'evt_edge': 1,
            'pre_ms': -500,
            'post_ms': 300,
            
            'pre_samples': 0,
            'post_samples': 0,
            'span': 0,
            }

# convenient paths
exp_struct['folders']['bird'] = os.path.abspath('/mnt/microdrive/song_recordings/g8r8')
data_folder = os.path.join(exp_struct['folders']['bird'], 'Data')
meta_folder = os.path.join(exp_struct['folders']['bird'], 'MetaData')
set_folder = os.path.join(exp_struct['folders']['bird'], 'Settings')
data_days = et.list_subfolders(data_folder)

In [4]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/song_recordings/g8r8',
  'raw': '/mnt/microdrive/birds/b11k10/Ephys/raw/20190715_02',
  'kwik': '/data/experiment/microdrive/b11k10/Ephys/kwik/20190715_02',
  'msort': '/data/experiment/microdrive/b11k10/Ephys/msort/20190715_02',
  'ksort': '/data/experiment/microdrive/b11k10/Ephys/ksort/20190715_02'},
 'files': {'par': '/data/experiment/microdrive/b11k10/Ephys/ksort/20190715_02/params.json',
  'set': '/mnt/microdrive/birds/b11k10/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/b11k10/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/b11k10/Ephys/kwik/20190715_02/stream.kwd',
  'kwik': '/data/experiment/microdrive/b11k10/Ephys/kwik/20190715_02/sort_1/spikes.kwik',
  'kwe': '/data/experiment/microdrive/b11k10/Ephys/kwik/20190715_02/events.kwe',
  'mda_raw': '/data/experiment/microdrive/b11k10/Ephys/msort/20190715_02/raw.mda',
  'bin_raw': '/data/experiment/microdrive/b11k10/Ephys/ksort/20190715_02/raw.bin'}}

In [5]:
data_days[:5]

['2019-09-15-0136',
 '2019-09-09-0130',
 '2019-09-10-0131',
 '2019-09-11-0132',
 '2019-09-12-0133']

In [6]:
def get_day_files(data_folder, data_day):
    logger.info('getting files from day {}'.format(data_day))
    files_list = glob.glob(os.path.join(data_folder, data_day, '*.lvd'))
    files_list.sort()
    return files_list

def read_all_day(data_folder: str, data_day: str) -> (pd.DataFrame, pd.DataFrame):
    day_files = get_day_files(data_folder, data_day)
    day_files.sort()
    n_files = len(day_files)
    logger.info('Reading {} files'.format(n_files))
    days_df_list = list(map(rd.file_as_data_frame, day_files))
    logger.info('Done')
    day_df = pd.concat(days_df_list)
    
    # make a pandas with the timestamps of the beginning of all files
    logger.info('Getting timestamps of file beginnings')
    file_stamp_df = day_df.loc[day_df['file_sample']==0, ['file', 'time_stamp']].set_index('file')
    file_stamp_df.head()
    return day_df, file_stamp_df

def get_ttl_evt(day_df, chan=1, ev_name='glass', threshold=1000):
    # get on and offs of a 'ttl' event read as an analog chan
    logger.info('getting on/off stamps of event {} in channel {}'.format(ev_name, chan))
    day_df['ttl_{}_on'.format(ev_name)] = day_df.loc[:, chan].diff().gt(threshold)
    day_df['ttl_{}_off'.format(ev_name)] = day_df.loc[:, chan].diff().le(-threshold)
    return day_df

data_day = 0
day_files = get_day_files(data_folder, data_days[data_day])
set_files = glob.glob(os.path.join(set_folder, '*.txt'))

day_df, file_stamp_df = read_all_day(data_folder, data_days[data_day])
day_df = get_ttl_evt(day_df, 1, 'glass')
# stamp_day_df = day_df.set_index('abs_tstamp')

day_df.head()

2019-09-18 11:05:05,164 - root - INFO - getting files from day 2019-09-15-0136
2019-09-18 11:05:05,238 - root - INFO - getting files from day 2019-09-15-0136
2019-09-18 11:05:05,297 - root - INFO - Reading 2733 files
2019-09-18 11:07:03,385 - root - INFO - Done
2019-09-18 11:07:48,597 - root - INFO - Getting timestamps of file beginnings
2019-09-18 11:08:17,680 - root - INFO - getting on/off stamps of event glass in channel 1


Unnamed: 0,0,1,2,3,file_type,time_stamp,file,file_sample,ttl_glass_on,ttl_glass_off
0,99,148,0,0,Undirected,2019-09-15 07:29:53.812500,g8r8U-f00001,0,False,False
1,113,144,0,0,Undirected,2019-09-15 07:29:53.812500,g8r8U-f00001,1,False,False
2,94,147,0,0,Undirected,2019-09-15 07:29:53.812500,g8r8U-f00001,2,False,False
3,92,146,0,0,Undirected,2019-09-15 07:29:53.812500,g8r8U-f00001,3,False,False
4,95,145,0,0,Undirected,2019-09-15 07:29:53.812500,g8r8U-f00001,4,False,False


In [7]:
day_df = day_df.set_index(['file', 'file_sample']).sort_values(['file', 'file_sample'])

KeyboardInterrupt: 

## Read masks/clusters

In [7]:
### reading the masks, clusters
def read_masks(meta_path, data_day) -> pd.DataFrame:
    masks_file = 'masks.mat'
    masks_f_path = os.path.join(meta_path, data_day, masks_file)
    masks_mat = sio.loadmat(masks_f_path)
    vars_to_load = {'file': 'files', 'masks': 'masks', 'song': 'song', 'spectrogram': 'spectrogram'}
    masks_df = pd.DataFrame({k: masks_mat[v].flatten() for k,v in vars_to_load.items()})
    # clean up files field
    masks_df['file'] = masks_df['file'].apply(lambda x: np.array2string(x.squeeze()).strip('\'').split('.')[0])

    #clean up masks arrays
    masks_df['masks'] = masks_df['masks'].apply(np.squeeze)
    return masks_df

masks_df = read_masks(meta_folder,  data_days[data_day])
masks_df.head()

Unnamed: 0,file,masks,song,spectrogram
0,g8r8U-f00001,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[29517], [34129]]","[[-37, -39, -36, -29, -19, -42, -42, -28, -34,..."
1,g8r8U-f00002,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[29984], [35325]]","[[-28, -35, -38, -36, -23, -39, -35, -24, -25,..."
2,g8r8U-f00003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[30132], [33567]]","[[-29, -42, -19, -28, -28, -43, -35, -16, -26,..."
3,g8r8U-f00004,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[[]],"[[-35, -34, -29, -22, -15, -45, -22, -13, -24,..."
4,g8r8U-f00005,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[30074], [33866]]","[[-24, -25, -38, -24, -35, -31, -19, -39, -31,..."


In [8]:
# read the clusters file
def read_clu_file(meta_folder, clu, data_day, prefix='song'):
    clu_file = '{}_C{}.txt'.format(prefix, clu)
    clu_pd = pd.read_csv(os.path.join(meta_folder, data_day, clu_file), sep='\t')
    clu_pd['filename'] = clu_pd['filename'].apply(lambda x: x.split('.')[0])
    clu_pd['clu'] = clu
    
    clu_pd.rename(columns={'filename':'file'}, inplace=True)
    return clu_pd

clu_list = [4, 11]
clu_pd = pd.concat([read_clu_file(meta_folder, clu, data_days[data_day]) for clu in clu_list])
clu_pd = clu_pd.sort_values(['file', 'clu', 'segon'])
clu_pd.head()

Unnamed: 0,file,segon,segoff,clu
1548,g8r8U-f00005,31098,1744,11
880,g8r8U-f00008,104739,4531,4
1204,g8r8U-f00008,150562,4572,4
121,g8r8U-f00008,212610,4371,4
1688,g8r8U-f00008,280996,4642,4


### get the bouts from the masks
- use the masks to get the segments with silences
- look for 'bouts' that are song (have a cluster in them)

In [9]:
def masks_to_bouts(a_mask, min_ibi=2000):
    try:
        onset = np.where(np.diff(a_mask.astype(np.int8))>0)[0]
        offset = np.where(np.diff(a_mask.astype(np.int8))<0)[0]
        
        # fix edges
        if offset[-1] < onset[-1]:
            offset = np.append(offset, a_mask.size)
            
        if onset[0] > offset[0]:
            onset = np.append(0, onset)

        on_lens = offset-onset
        off_lens = onset[1:] - offset[:-1]
        bout_break_idx = np.where(off_lens > min_ibi)

        bout_ends = np.append(offset[bout_break_idx], offset[-1])
        bout_starts = np.append(onset[0], onset[bout_break_idx[0] + 1])
        bouts_arr = np.vstack([bout_starts, bout_ends]).T
    except:
        logger.debug('something went wrong getting bouts')
        bouts_arr = None
    return bouts_arr

masks_df['bouts'] = masks_df['masks'].apply(masks_to_bouts)

## make a bouts dataframe
 - for every file, split the 'bouts' array into the bouts
 - find the ones that have a clu in them

In [10]:
f_list = []
b_list = []
for file, bouts in masks_df.dropna().loc[:,['file', 'bouts']].values:
    for b in bouts:
        b_list.append(b)
        f_list.append(file)
bouts_df = pd.DataFrame({'file': f_list, 'bout': b_list})
bouts_df.head(3)

Unnamed: 0,file,bout
0,g8r8U-f00001,"[234, 255]"
1,g8r8U-f00002,"[238, 264]"
2,g8r8U-f00003,"[239, 429]"


In [11]:
## lookup which bouts contain the 'song' cluster

In [12]:
def bout_lookup(file, bout, clu, clu_pd, time_scale=128):
    clu_on = clu_pd.loc[(clu_pd['file'] == file) & (clu_pd['clu'] == clu), 'segon'].values/time_scale
    has_clu = np.any((clu_on > bout[0]) & (clu_on < bout[1]))
    return clu_on, has_clu

song_clu = 4
clu_on, has_clu = bout_lookup('g8r8U-f00008', [241, 255], 4, clu_pd)
has_clu

bouts_df['is_song'] = bouts_df.apply(lambda x: bout_lookup(x['file'], x['bout'], song_clu, clu_pd)[1], axis=1)

In [13]:
bouts_df[bouts_df['is_song']]

Unnamed: 0,file,bout,is_song
7,g8r8U-f00008,"[241, 3003]",True
8,g8r8U-f00009,"[16, 1062]",True
9,g8r8U-f00010,"[239, 2973]",True
10,g8r8U-f00011,"[240, 2161]",True
11,g8r8U-f00012,"[238, 855]",True
...,...,...,...
2141,g8r8U-f02152,"[156, 1352]",True
2142,g8r8U-f02153,"[238, 974]",True
2143,g8r8U-f02154,"[238, 599]",True
2175,g8r8U-f02186,"[238, 772]",True


### get bouts timestamps and interbout intervals

In [14]:
file_stamp_df.head()

Unnamed: 0_level_0,time_stamp
file,Unnamed: 1_level_1
g8r8U-f00001,2019-09-15 07:29:53.812500
g8r8U-f00002,2019-09-15 07:31:51.023400
g8r8U-f00003,2019-09-15 07:31:59.921900
g8r8U-f00004,2019-09-15 07:33:00.496100
g8r8U-f00005,2019-09-15 07:34:51.011700


In [22]:
def tstamp_lookup(file, bout, files_df):
    file_tstamp = files_df.loc[file]['time_stamp']
    bout_stamp = [pd.Timedelta(x, unit='ms') + file_tstamp for x in bout]
    return bout_stamp
    
    
bouts_df['bout_stamp'] = bouts_df.apply(lambda x: tstamp_lookup(x['file'], x['bout'], file_stamp_df), axis=1)
bouts_df['abs_start'] = bouts_df['bout_stamp'].apply(lambda x: x[0])
bouts_df['abs_end'] = bouts_df['bout_stamp'].apply(lambda x: x[1])
bouts_df['len'] = bouts_df['abs_end'] - bouts_df['abs_start']
bouts_df['ibi'] = bouts_df['abs_start'] - bouts_df['abs_end'].shift(1)
song_df = bouts_df[bouts_df['is_song']]
song_df['ibi_ms'] = (song_df['ibi'].dropna()/pd.Timedelta(milliseconds=1)).astype(np.int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [23]:
song_df.head()

Unnamed: 0,file,bout,is_song,bout_stamp,abs_start,abs_end,len,ibi,ibi_ms
7,g8r8U-f00008,"[241, 3003]",True,"[2019-09-15 07:38:34.487100, 2019-09-15 07:38:...",2019-09-15 07:38:34.487100,2019-09-15 07:38:37.249100,00:00:02.762000,00:00:06.689400,6689
8,g8r8U-f00009,"[16, 1062]",True,"[2019-09-15 07:38:49.019900, 2019-09-15 07:38:...",2019-09-15 07:38:49.019900,2019-09-15 07:38:50.065900,00:00:01.046000,00:00:11.770800,11770
9,g8r8U-f00010,"[239, 2973]",True,"[2019-09-15 07:38:54.614000, 2019-09-15 07:38:...",2019-09-15 07:38:54.614000,2019-09-15 07:38:57.348000,00:00:02.734000,00:00:04.548100,4548
10,g8r8U-f00011,"[240, 2161]",True,"[2019-09-15 07:39:08.837700, 2019-09-15 07:39:...",2019-09-15 07:39:08.837700,2019-09-15 07:39:10.758700,00:00:01.921000,00:00:11.489700,11489
11,g8r8U-f00012,"[238, 855]",True,"[2019-09-15 07:39:21.206700, 2019-09-15 07:39:...",2019-09-15 07:39:21.206700,2019-09-15 07:39:21.823700,00:00:00.617000,00:00:10.448000,10448


In [24]:
song_df.describe()

Unnamed: 0,len,ibi,ibi_ms
count,882,882,882.0
mean,0 days 00:00:01.095218,0 days 00:00:12.032864,12032.414966
std,0 days 00:00:00.610551,0 days 00:00:18.862738,18862.737213
min,0 days 00:00:00.279000,0 days 00:00:01.901800,1901.0
25%,0 days 00:00:00.655250,0 days 00:00:06.241400,6241.0
50%,0 days 00:00:00.943500,0 days 00:00:08.565200,8564.5
75%,0 days 00:00:01.360500,0 days 00:00:11.991025,11990.75
max,0 days 00:00:05.900000,0 days 00:06:06.741800,366741.0


In [25]:
song_df['ibi'].dropna()/pd.Timedelta(seconds=1)

7        6.6894
8       11.7708
9        4.5481
10      11.4897
11      10.4480
         ...   
2141     9.5897
2142     7.4759
2143     7.4749
2175     2.8637
2176     5.9211
Name: ibi, Length: 882, dtype: float64

In [32]:
plt.close('all')

In [33]:
plt.figure()
song_df['ibi_ms'].hist(bins=np.arange(0, 50000, 500), density=True)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x7f0da3cd9550>

### Plot the distribution of inberbout intervals

In [77]:
fig, ax = plt.subplots(nrows=1, ncols=1)
max_ms = 50000
bins=np.arange(0, max_ms, 500)
song_df['ibi_ms'].hist(bins=np.arange(0, max_ms, 500), density=True, ax=ax)
song_df['ibi_ms'].plot.density(ax=ax)
ax.set_xlim([0, 50000])

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(0, 50000)

In [78]:
import seaborn as sns

In [140]:
def plot_hist(series, bins, distribution, ax=None):
    if ax is None:
        fig, ax = plt.subplots(nrows=1, ncols=1)
    sns.distplot(series, ax=ax, kde=False, rug=False, fit=distribution, bins=bins, norm_hist=False)
    ax.set_xlim(bins[0], bins[-1])
    return ax

max_ms = 50000
stat_try = stats.johnsonsu
stat_try = None
bins=np.arange(0, max_ms, 500)

fig, axes = plt.subplots(nrows=2, ncols=1)

#sns.distplot(song_df['ibi_ms'], ax=axes[0], kde=False, rug=False, fit=stat_try, bins=bins)
plot_hist(song_df['ibi_ms'], bins, stat_try, ax=axes[0])
#plot_hist(song_df['ibi_ms'], bins, stat_try, axes[0])
#axes[0].set_xlim([0, max_ms])


ax.set_title('Inter-bout distributions')
plt.tight_layout()

  del sys.path[0]


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [97]:
def fit_distribution(series, distribution):
    pass

hist, bins = np.histogram(song_df['ibi_ms'], bins=bins)
plt.figure()
plt.plot(bins[1:], hist)

  """


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f0d9f814358>]

In [99]:
peak_bin = bins[np.argmax(hist)]
peak_bin

6000

In [None]:
## Simulation
 - pick a bout
 - silence 4000 - 6000
 - 

In [165]:
# get the next bout with a refractory period (inhibition)
# check for the bout, get the first one next with a refractory preiod
bouts_df = bouts_df.sort_values('abs_start').reset_index(drop=True)
ms_scale = pd.Timedelta(milliseconds=1)
def next_bout_delta(abs_end, bouts_df, stim_start: pd.Timedelta=pd.Timedelta(milliseconds=0), 
                    stim_end: pd.Timedelta=pd.Timedelta(milliseconds=0)):
    
    abs_stim_start = abs_end + stim_start
    abs_stim_end = abs_end + stim_end
    
    is_before_stim = (bouts_df['abs_start'] > abs_end) & (bouts_df['abs_start'] < abs_stim_start)
    is_after_stim = (bouts_df['abs_start'] > abs_stim_end)
    
    try:
        next_bout_df = bouts_df[is_before_stim | is_after_stim].iloc[0]
    
        delta_ms = (next_bout_df['abs_start'] - abs_end)/ms_scale
        return delta_ms
    except IndexError:
        return None
    
next_bout_delta(bouts_df.loc[2, 'abs_end'], bouts_df, pd.Timedelta(milliseconds=10000), pd.Timedelta(milliseconds=22000))

23858.7

In [169]:
bouts_df.index.values

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [194]:
peak = 7000
stim_window = 2000

bouts_df['ibi_ms_prep_inhibit'] = [next_bout_delta(bouts_df.loc[i, 'abs_end'], 
                                                   bouts_df, 
                                                   pd.Timedelta(milliseconds=peak-stim_window), 
                                                   pd.Timedelta(milliseconds=peak)) for i in bouts_df.index.values]
def do_delay(t, start_win, end_win):
    if t < start_win:
        delayed_t = t
    elif t < end_win:
        delayed_t = end_win
    else:
        delayed_t = t
    return delayed_t

bouts_df['ibi_ms_prep_delay'] = bouts_df['abs_end'].apply(lambda x: next_bout_delta(x, bouts_df, pd.Timedelta(5000), pd.Timedelta(7000)))
bouts_df['ibi_ms_prep_delay'] = bouts_df['ibi_ms_prep_delay'].apply(lambda x: do_delay(x, peak-stim_window, peak))

In [190]:
bouts_df['ibi_ms_song_inhibit'] = bouts_df['abs_end'].apply(lambda x: next_bout_delta(x, bouts_df, pd.Timedelta(6000), pd.Timedelta(8000)))
bouts_df['ibi_ms_prep_inhibit'] = bouts_df['abs_end'].apply(lambda x: next_bout_delta(x, bouts_df, pd.Timedelta(5000), pd.Timedelta(7000)))

In [172]:
bouts_df['ibi_inh'] = ibi_inh

In [182]:
bouts_df.head(3)

Unnamed: 0,file,bout,is_song,bout_stamp,abs_start,abs_end,len,ibi,ibi_ms,ibi_ms_song_inhibit,ibi_ms_prep_inhibit,ibi_inh
0,g8r8U-f00008,"[241, 3003]",True,"[2019-09-15 07:38:34.487100, 2019-09-15 07:38:...",2019-09-15 07:38:34.487100,2019-09-15 07:38:37.249100,00:00:02.762000,00:00:06.689400,6689,11770.8,11770.8,31588.6
1,g8r8U-f00009,"[16, 1062]",True,"[2019-09-15 07:38:49.019900, 2019-09-15 07:38:...",2019-09-15 07:38:49.019900,2019-09-15 07:38:50.065900,00:00:01.046000,00:00:11.770800,11770,4548.1,4548.1,4548.1
2,g8r8U-f00010,"[239, 2973]",True,"[2019-09-15 07:38:54.614000, 2019-09-15 07:38:...",2019-09-15 07:38:54.614000,2019-09-15 07:38:57.348000,00:00:02.734000,00:00:04.548100,4548,11489.7,11489.7,23858.7


In [195]:
def plot_hist(series, bins, distribution, ax=None):
    if ax is None:
        fig, ax = plt.subplots(nrows=1, ncols=1)
    sns.distplot(series, ax=ax, kde=False, rug=False, fit=distribution, bins=bins, norm_hist=False)
    ax.set_xlim(bins[0], bins[-1])
    return ax

stats_dict = {}
for cond in ['ibi_ms_prep_inhibit']:
    x = bouts_df['ibi_ms_prep_inhibit'].dropna()
    y = song_df.dropna()['ibi_ms']
    stats_dict[cond] = stats.mannwhitneyu(x, y, alternative='greater')
    
max_ms = 50000
stat_try = stats.johnsonsu
stat_try = None
bins=np.arange(0, max_ms, 500)

fig, axes = plt.subplots(nrows=3, ncols=1)

#sns.distplot(song_df['ibi_ms'], ax=axes[0], kde=False, rug=False, fit=stat_try, bins=bins)
plot_hist(song_df['ibi_ms'], bins, stat_try, ax=axes[0])
plot_hist(bouts_df['ibi_ms_prep_delay'], bins, stat_try, ax=axes[1])
plot_hist(bouts_df['ibi_ms_prep_inhibit'], bins, stat_try, ax=axes[2])
#plot_hist(song_df['ibi_ms'], bins, stat_try, axes[0])
#axes[0].set_xlim([0, max_ms])


ax.set_title('Inter-bout distributions')
plt.tight_layout()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [94]:
# Some details about the histogram
hist, bins = np.histogram(song_df['ibi_ms'], bins=bins)
bin_width = hist[1] - hist[0]
N = len(song_df['ibi_ms'])

dist = stats.johnsonsu

params = dist.fit(np.array(song_df['ibi_ms']))

x_mid = (x + np.roll(x, -1))[:-1] / 2.0 # go from bin edges to bin middles
arg = params[:-2]
loc = params[-2]
scale = params[-1]

pdf = dist.pdf(x_mid, loc=loc, scale=scale, *arg)
pdf_scaled = pdf * bin_width * N # to go from pdf back to counts need to un-normalise the pdf

        
# a1, b1, loc1, scale1 = stats.beta.fit(mean_hist)

# pdf_g = stats.beta.pdf(bins, a1, b1, loc1, scale1)
plt.figure()
plt.plot(x_mid, pdf_scaled)
# #pdf_g = stats.johnsonsu.pdf(x, a, b) # now get theoretical values in our interval  
# plt.plot(x, pdf_g) # plot it



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f0d9ef13048>]

In [92]:
data = song_df['ibi_ms']
hist = np.histogram(data, bins=bins)
hist_dist = stats.rv_histogram(hist)

plt.figure()
x = np.arange(0, max_ms, 10)
plt.plot(x, hist_dist.pdf(x), label='CDF')

  """


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f0d9ef1f6d8>]

In [None]:
# a simple experiment

### get a bout and plot it

In [21]:
def get_bout(file, bout, masks_df):
    data = masks_df[masks_df['file']==file]['spectrogram'].values[0][:, bout[0]:bout[1]]
    return data

bout_spec = get_bout('g8r8U-f00008', [241, 3003], masks_df)
plt.figure()
plt.imshow(bout_spec, aspect='auto')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x7f0dcc9a9860>