Skip to content

Commit

Permalink
.py and .mat files from ./doc/notebooks necessary for tutorials/how-t…
Browse files Browse the repository at this point in the history
…os copied to ./notebooks
  • Loading branch information
NickleDave committed Dec 30, 2018
1 parent 73dc7d2 commit ecff545
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 0 deletions.
Binary file added notebooks/bat1_annotation.mat
Binary file not shown.
63 changes: 63 additions & 0 deletions notebooks/batlab2seq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
from scipy.io import loadmat

from crowsetta.classes import Sequence


def batlab2seq(mat_file):
"""unpack BatLAB annotation into list of Sequence objects
example of a function that unpacks annotation from
a complicated data structure and returns the necessary
data as a Sequence object
Parameters
----------
mat_file : str
filename of .mat file created by BatLAB
Returns
-------
seq_list : list
of Sequence objects
"""
mat = loadmat(mat_file, squeeze_me=True)
seq_list = []
# annotation structure loads as a Python dictionary with two keys
# one maps to a list of filenames,
# and the other to a Numpy array where each element is the annotation
# coresponding to the filename at the same index in the list.
# We can iterate over both by using the zip() function.
for filename, annotation in zip(mat['filenames'], mat['annotations']):
# below, .tolist() does not actually create a list,
# instead gets ndarray out of a zero-length ndarray of dtype=object.
# This is just weirdness that results from loading complicated data
# structure in .mat file.
seg_start_times = annotation['segFileStartTimes'].tolist()
seg_end_times = annotation['segFileEndTimes'].tolist()
seg_types = annotation['segType'].tolist()
if type(seg_types) == int:
# this happens when there's only one syllable in the file
# with only one corresponding label
seg_types = np.asarray([seg_types]) # so make it a one-element list
elif type(seg_types) == np.ndarray:
# this should happen whenever there's more than one label
pass
else:
# something unexpected happened
raise ValueError("Unable to load labels from {}, because "
"the segType parsed as type {} which is "
"not recognized.".format(wav_filename,
type(seg_types)))
samp_freq = annotation['fs'].tolist()
seg_start_times_Hz = np.round(seg_start_times * samp_freq).astype(int)
seg_end_times_Hz = np.round(seg_end_times * samp_freq).astype(int)

seq = Sequence.from_keyword(file=filename,
labels=seg_types,
onsets_s=seg_start_times,
offsets_s=seg_end_times,
onsets_Hz=seg_start_times_Hz,
offsets_Hz=seg_end_times_Hz)
seq_list.append(seq)
return seq_list
48 changes: 48 additions & 0 deletions notebooks/parsebat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np
from scipy.io import loadmat


def parse_batlab_mat(mat_file):
"""parse batlab annotation.mat file"""
mat = loadmat(mat_file, squeeze_me=True)
annot_list = []
# annotation structure loads as a Python dictionary with two keys
# one maps to a list of filenames,
# and the other to a Numpy array where each element is the annotation
# coresponding to the filename at the same index in the list.
# We can iterate over both by using the zip() function.
for filename, annotation in zip(mat['filenames'], mat['annotations']):
# below, .tolist() does not actually create a list,
# instead gets ndarray out of a zero-length ndarray of dtype=object.
# This is just weirdness that results from loading complicated data
# structure in .mat file.
seg_start_times = annotation['segFileStartTimes'].tolist()
seg_end_times = annotation['segFileEndTimes'].tolist()
seg_types = annotation['segType'].tolist()
if type(seg_types) == int:
# this happens when there's only one syllable in the file
# with only one corresponding label
seg_types = np.asarray([seg_types]) # so make it a one-element list
elif type(seg_types) == np.ndarray:
# this should happen whenever there's more than one label
pass
else:
# something unexpected happened
raise ValueError("Unable to load labels from {}, because "
"the segType parsed as type {} which is "
"not recognized.".format(filename,
type(seg_types)))
samp_freq = annotation['fs'].tolist()
seg_start_times_Hz = np.round(seg_start_times * samp_freq).astype(int)
seg_end_times_Hz = np.round(seg_end_times * samp_freq).astype(int)
annot_dict = {
'audio_file': filename,
'seg_types': seg_types,
'seg_start_times': seg_start_times,
'seg_end_times': seg_end_times,
'seg_start_times_Hz': seg_start_times_Hz,
'seg_end_times_Hz': seg_end_times_Hz,
}
annot_list.append(annot_dict)

return annot_list

0 comments on commit ecff545

Please sign in to comment.