# Read flatclust clusters and masks
- clu files: clu_Cxx.txt (xx is syllable number +1; 11 is introductory note)
- masks file: masks.mat

In [1]:
%matplotlib ipympl
import scipy.io as sio
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt

In [2]:
meta_path = os.path.abspath('/Volumes/Samsung_X5/microdrive/b11k10/MetaData/2019-06-12-7061')
meta_path = os.path.abspath('/mnt/microdrive/birds/b11k10/MetaData/2019-06-12-7061')
meta_path = os.path.abspath('/mnt/microdrive/song_recordings/g8r8/MetaData/2019-09-15-0136')
masks_file = 'masks.mat'

In [3]:
meta_path

'/mnt/microdrive/song_recordings/g8r8/MetaData/2019-09-15-0136'

#### Read masks

In [4]:
# read the matlab file
masks_f_path = os.path.join(meta_path, masks_file)
masks_mat = sio.loadmat(masks_f_path)
#vars_to_load = {'file': 'files', 'masks': 'masks', 'song': 'song', 'spectrogram': 'spectrogram', 'tags': 'tags', 'timestamp': 'timestamp'}
vars_to_load = {'file': 'files', 'masks': 'masks', 'song': 'song', 'spectrogram': 'spectrogram'}
masks_df = pd.DataFrame({k: masks_mat[v].flatten() for k,v in vars_to_load.items()})
# clean up files field
masks_df['file'] = masks_df['file'].apply(lambda x: np.array2string(x.squeeze()).strip('\'').split('.')[0])

#clean up masks arrays
masks_df['masks'] = masks_df['masks'].apply(np.squeeze)
masks_df.head()

Unnamed: 0,file,masks,song,spectrogram
0,g8r8U-f00001,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[29517], [34129]]","[[-37, -39, -36, -29, -19, -42, -42, -28, -34,..."
1,g8r8U-f00002,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[29984], [35325]]","[[-28, -35, -38, -36, -23, -39, -35, -24, -25,..."
2,g8r8U-f00003,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[30132], [33567]]","[[-29, -42, -19, -28, -28, -43, -35, -16, -26,..."
3,g8r8U-f00004,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",[[]],"[[-35, -34, -29, -22, -15, -45, -22, -13, -24,..."
4,g8r8U-f00005,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[30074], [33866]]","[[-24, -25, -38, -24, -35, -31, -19, -39, -31,..."


In [5]:
vars_to_load = {'file': 'files', 'masks': 'masks', 'song': 'song', 'spectrogram': 'spectrogram', 'tags': 'tags', 'timestamp': 'timestamp'}

In [6]:
ex_file = 9
a_mask, a_song, a_file, a_spectrogram = tuple([masks_df.loc[ex_file][f] for f in ['masks', 'song', 'file', 'spectrogram']])
plt.figure()
plt.imshow(a_spectrogram[::-1], aspect='auto', cmap='inferno')
plt.plot(a_mask*20)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f6a9bb3f6d8>]

In [7]:
414848 /3238

128.118591723286

In [8]:
a_file

'g8r8U-f00010'

In [27]:
a_mask.shape

(3238,)

In [7]:
a_song/32

array([[  744.53125,  6957.     , 11672.75   ],
       [ 3890.46875, 10914.625  , 14604.96875]])

In [8]:
np.repeat(np.array([0, 1]), a_song.shape[1]).reshape(a_song.shape)

array([[0, 0, 0],
       [1, 1, 1]])

In [9]:
plt.plot(a_mask)
song_values = np.repeat(np.array([0, 1]), a_song.shape[1]).reshape(a_song.shape)
#plt.plot(a_song, song_values)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Read clusters

In [30]:
# read the clusters file
def read_clu_file(meta_path, clu, prefix='song'):
    clu_file = '{}_C{}.txt'.format(prefix, clu)
    clu_pd = pd.read_csv(os.path.join(meta_path, clu_file), sep='\t')
    clu_pd['filename'] = clu_pd['filename'].apply(lambda x: x.split('.')[0])
    clu_pd['len'] = clu_pd['segoff']
    clu_pd['segoff'] = clu_pd['segon'] + clu_pd['len']
    clu_pd['clu'] = clu
    return clu_pd

clu_list = [4, 11]
clu_pd = pd.concat([read_clu_file(meta_path, clu) for clu in clu_list])

In [31]:
clu_pd.head()

Unnamed: 0,filename,segon,segoff,len,clu
0,g8r8U-f00462,151387,155103,3716,4
1,g8r8U-f00516,98137,101953,3816,4
2,g8r8U-f00013,64004,67925,3921,4
3,g8r8U-f01704,112979,117113,4134,4
4,g8r8U-f01364,58678,62855,4177,4


In [32]:
clu_pd = clu_pd.set_index(['filename', 'clu']).sort_values(['filename', 'segon'])
clu_pd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,segon,segoff,len
filename,clu,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
g8r8U-f00005,11,31098,32842,1744
g8r8U-f00008,11,31363,33081,1718
g8r8U-f00008,11,56943,58925,1982
g8r8U-f00008,11,64782,66634,1852
g8r8U-f00008,11,70146,72099,1953


### look at bouts and masks
see the masks together with the syllables (clusters)

In [33]:
ex_file = 22
a_mask, a_song, a_file = tuple([masks_df.loc[ex_file][f] for f in ['masks', 'song', 'file']])

plt.figure()
plt.plot(a_mask)
song_values = np.repeat(np.array([1, 1]), a_song.shape[1]).reshape(a_song.shape)
plt.plot(a_song/128, song_values)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fe1c4fee048>,
 <matplotlib.lines.Line2D at 0x7fe1c4fee400>]

In [34]:
a_file

'g8r8U-f00023'

In [37]:
t_scale = 128
file_clu_pd = clu_pd.loc[a_file].sort_values('segon')
file_clu_pd['segon']

plt.figure()
offset = 32
plt.plot(file_clu_pd['segon']/t_scale + offset, np.ones_like(file_clu_pd['segon']), 'r*')
plt.plot(file_clu_pd['segoff']/t_scale + offset, np.ones_like(file_clu_pd['segoff']), 'k.')
#plt.plot(a_mask)
song_values = np.repeat(np.array([1, 1]), a_song.shape[1]).reshape(a_song.shape)
plt.plot(a_song/t_scale-32, song_values*1.005, 'b')
plt.plot(a_mask, 'c')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fe1c4efcac8>]

In [55]:
file_clu_pd

Unnamed: 0_level_0,segon,segoff
clu,Unnamed: 1_level_1,Unnamed: 2_level_1
11,24849,1494
11,28293,1487
2,30731,4052
2,53723,4269
2,77585,4477
11,97907,1579
2,107341,4144
11,190141,2024
11,223648,1282
11,226720,1475


### get bouts

In [None]:
# use the 