Skip to content

Commit

Permalink
Merge pull request #174 from vocalpy/rename-on/offset-ind-to-on/offse…
Browse files Browse the repository at this point in the history
…t-sample

Rename on/offset ind to on/offset sample
  • Loading branch information
NickleDave committed May 15, 2022
2 parents 611de55 + 4e3c96f commit ef04fa9
Show file tree
Hide file tree
Showing 18 changed files with 226 additions and 226 deletions.
12 changes: 6 additions & 6 deletions src/crowsetta/formats/seq/birdsongrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def to_seq(self,
"""
seqs = []
for birdsongrec_seq in self.sequences:
onset_inds = np.array([syl.position for syl in birdsongrec_seq.syls])
offset_inds = np.array([syl.position + syl.length for syl in birdsongrec_seq.syls])
onset_samples = np.array([syl.position for syl in birdsongrec_seq.syls])
offset_samples = np.array([syl.position + syl.length for syl in birdsongrec_seq.syls])
labels = np.array(
# NOTE we convert syl.label to string so dtype is consistent across formats
# and to adhere to schema for `'generic-seq'`
Expand All @@ -211,17 +211,17 @@ def to_seq(self,
samplerate_this_wav = samplerate

if samplerate_this_wav:
onsets_s = onset_inds / samplerate_this_wav
offsets_s = offset_inds / samplerate_this_wav
onsets_s = onset_samples / samplerate_this_wav
offsets_s = offset_samples / samplerate_this_wav
if round_times:
onsets_s = np.round(onsets_s, decimals=decimals)
offsets_s = np.round(offsets_s , decimals=decimals)
else:
onsets_s = None
offsets_s = None

seq = crowsetta.Sequence.from_keyword(onset_inds=onset_inds,
offset_inds=offset_inds,
seq = crowsetta.Sequence.from_keyword(onset_samples=onset_samples,
offset_samples=offset_samples,
onsets_s=onsets_s,
offsets_s=offsets_s,
labels=labels
Expand Down
32 changes: 16 additions & 16 deletions src/crowsetta/formats/seq/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
ONSET_OFFSET_COLS_ERR = """For onset times and offset times,
all values must be specified in at least one unit:
seconds (float), or sample number (integer). All rows must be non-null for either
'onset_s' and 'offset_s' or 'onset_ind' and 'offset_ind'.
'onset_s' and 'offset_s' or 'onset_sample' and 'offset_sample'.
Both units can also be specified. Conversion between units is not validated.
"""

Expand All @@ -44,8 +44,8 @@ class GenericSeqSchema(pandera.SchemaModel):
label: Series[pd.StringDtype] = pandera.Field(coerce=True)
onset_s: Optional[Series[float]] = pandera.Field()
offset_s: Optional[Series[float]] = pandera.Field()
onset_ind: Optional[Series[int]] = pandera.Field()
offset_ind: Optional[Series[int]] = pandera.Field()
onset_sample: Optional[Series[int]] = pandera.Field()
offset_sample: Optional[Series[int]] = pandera.Field()

notated_path: Series[str] = pandera.Field()
annot_path: Series[str] = pandera.Field()
Expand All @@ -62,23 +62,23 @@ def both_onset_s_and_offset_s_if_either(cls, df: pd.DataFrame) -> bool:
return True

@pandera.dataframe_check(error=ONSET_OFFSET_COLS_ERR)
def both_onset_ind_and_offset_ind_if_either(cls, df: pd.DataFrame) -> bool:
"""check that, if one of {'onset_ind', 'offset_ind'} column is present,
def both_onset_sample_and_offset_sample_if_either(cls, df: pd.DataFrame) -> bool:
"""check that, if one of {'onset_sample', 'offset_sample'} column is present,
then both are present"""
if any([col in df for col in ('onset_ind', 'offset_ind')]):
return all([col in df for col in ('onset_ind', 'offset_ind')])
if any([col in df for col in ('onset_sample', 'offset_sample')]):
return all([col in df for col in ('onset_sample', 'offset_sample')])
else:
return True

@pandera.dataframe_check(error=ONSET_OFFSET_COLS_ERR)
def onset_offset_s_and_ind_are_not_both_missing(cls, df: pd.DataFrame) -> bool:
"""check that at least one of the on/offset column pairs is present:
either {'onset_s', 'offset_s'} or {'onset_ind', 'offset_ind'}"""
either {'onset_s', 'offset_s'} or {'onset_sample', 'offset_sample'}"""
if 'onset_s' not in df and 'offset_s' not in df:
return 'onset_ind' in df and 'offset_ind' in df
elif 'onset_ind' not in df and 'offset_ind' not in df:
return 'onset_sample' in df and 'offset_sample' in df
elif 'onset_sample' not in df and 'offset_sample' not in df:
return 'onset_s' in df and 'offset_s' in df
elif all([col in df for col in ('onset_s', 'offset_s', 'onset_ind', 'offset_ind')]):
elif all([col in df for col in ('onset_s', 'offset_s', 'onset_sample', 'offset_sample')]):
# i.e., else return True, but extra verbose for clarity
return True

Expand Down Expand Up @@ -225,18 +225,18 @@ def csv2annot(csv_path: PathLike) -> List[crowsetta.Annotation]:
else:
onsets_s = None
offsets_s = None
if 'onset_ind' and 'offset_ind' in df_annot:
onsets_inds = df_annot.onset_ind.values
offsets_inds = df_annot.offset_ind.values
if 'onset_sample' and 'offset_sample' in df_annot:
onsets_inds = df_annot.onset_sample.values
offsets_inds = df_annot.offset_sample.values
else:
onsets_inds = None
offsets_inds = None
seq = crowsetta.Sequence.from_keyword(
labels=labels,
onsets_s=onsets_s,
offsets_s=offsets_s,
onset_inds=onsets_inds,
offset_inds=offsets_inds,
onset_samples=onsets_inds,
offset_samples=offsets_inds,
)
annot = crowsetta.Annotation(
annot_path=annot_path,
Expand Down
12 changes: 6 additions & 6 deletions src/crowsetta/formats/seq/timit.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def to_seq(self,
and then sending them to a csv file,
the result should be the same on Windows and Linux.
"""
onset_inds = self.begin_samples
offset_inds = self.end_samples
onset_samples = self.begin_samples
offset_samples = self.end_samples
labels = self.text

if samplerate is None:
Expand All @@ -169,16 +169,16 @@ def to_seq(self,
)
samplerate = None

onsets_s = onset_inds / samplerate
offsets_s = offset_inds / samplerate
onsets_s = onset_samples / samplerate
offsets_s = offset_samples / samplerate

if round_times:
onsets_s = np.around(onsets_s, decimals=decimals)
offsets_s = np.around(offsets_s, decimals=decimals)

phn_seq = crowsetta.Sequence.from_keyword(labels=labels,
onset_inds=onset_inds,
offset_inds=offset_inds,
onset_samples=onset_samples,
offset_samples=offset_samples,
onsets_s=onsets_s,
offsets_s=offsets_s)
return phn_seq
Expand Down
4 changes: 2 additions & 2 deletions src/crowsetta/formats/seq/yarden.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ def to_seq(self,
)

samp_freq = annotation['fs'].tolist()
seq_dict['onset_inds'] = np.round(seq_dict['onsets_s'] * samp_freq).astype(int)
seq_dict['offset_inds'] = np.round(seq_dict['offsets_s'] * samp_freq).astype(int)
seq_dict['onset_samples'] = np.round(seq_dict['onsets_s'] * samp_freq).astype(int)
seq_dict['offset_samples'] = np.round(seq_dict['offsets_s'] * samp_freq).astype(int)

if round_times:
seq_dict['onsets_s'] = np.around(seq_dict['onsets_s'], decimals=decimals)
Expand Down
24 changes: 12 additions & 12 deletions src/crowsetta/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ class Segment(object):
used to annotate animal communication.
Typically, a single unit such as a syllable in human speech
or a "syllable" in birdsong."""
_FIELDS = ('label', 'onset_s', 'offset_s', 'onset_ind', 'offset_ind')
_FIELDS = ('label', 'onset_s', 'offset_s', 'onset_sample', 'offset_sample')

label = attr.ib(converter=str)
onset_s = attr.ib(converter=attr.converters.optional(float_or_None))
offset_s = attr.ib(converter=attr.converters.optional(float_or_None))
onset_ind = attr.ib(converter=attr.converters.optional(int_or_None))
offset_ind = attr.ib(converter=attr.converters.optional(int_or_None))
onset_sample = attr.ib(converter=attr.converters.optional(int_or_None))
offset_sample = attr.ib(converter=attr.converters.optional(int_or_None))
asdict = attr.asdict

@classmethod
Expand Down Expand Up @@ -66,20 +66,20 @@ def from_row(cls, row, header=None):

@classmethod
def from_keyword(cls, label, onset_s=None, offset_s=None,
onset_ind=None, offset_ind=None):
if ((onset_ind is None and offset_ind is None) and
onset_sample=None, offset_sample=None):
if ((onset_sample is None and offset_sample is None) and
(onset_s is None and offset_s is None)):
raise ValueError('must provide either onset_ind and offset_ind, or '
raise ValueError('must provide either onset_sample and offset_sample, or '
'onsets_s and offsets_s')

if onset_ind and offset_ind is None:
raise ValueError(f'onset_ind specified as {onset_ind} but offset_ind is None')
if onset_ind is None and offset_ind:
raise ValueError(f'offset_ind specified as {offset_ind} but onset_ind is None')
if onset_sample and offset_sample is None:
raise ValueError(f'onset_sample specified as {onset_sample} but offset_sample is None')
if onset_sample is None and offset_sample:
raise ValueError(f'offset_sample specified as {offset_sample} but onset_sample is None')
if onset_s and offset_s is None:
raise ValueError(f'onset_s specified as {onset_s} but offset_s is None')
if onset_s is None and offset_s:
raise ValueError(f'offset_s specified as {offset_ind} but onset_s is None')
raise ValueError(f'offset_s specified as {offset_sample} but onset_s is None')

return cls(label=label, onset_s=onset_s, offset_s=offset_s,
onset_ind=onset_ind, offset_ind=offset_ind)
onset_sample=onset_sample, offset_sample=offset_sample)

0 comments on commit ef04fa9

Please sign in to comment.