Skip to content

Commit

Permalink
MRG: #169 from vocalpy/rename-annot-notated-path
Browse files Browse the repository at this point in the history
ENH: Rename `Annotation.audio_path` -> `Annotation.notated_path`
  • Loading branch information
NickleDave committed May 14, 2022
2 parents de1bfba + 474da86 commit d80715b
Show file tree
Hide file tree
Showing 20 changed files with 83 additions and 74 deletions.
18 changes: 10 additions & 8 deletions src/crowsetta/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ class Annotation:
----------
annot_path : str, pathlib.Path
path to file from which annotations were loaded
audio_path : str, pathlib.Path
path to audio file that ``annot_path`` annotates.
notated_path : str, pathlib.Path
path to file that ``annot_path`` annotates.
E.g., an audio file, or an array file
that contains a spectrogram generated from audio.
Optional, default is None.
seq : crowsetta.Sequence
a sequence of annotated segments,
Expand All @@ -38,7 +40,7 @@ class Annotation:
"""
def __init__(self,
annot_path: PathLike,
audio_path: Optional[PathLike] = None,
notated_path: Optional[PathLike] = None,
seq: Optional[Sequence] = None,
bboxes: Optional[List[BBox]] = None):
if seq is None and bboxes is None:
Expand Down Expand Up @@ -73,13 +75,13 @@ def __init__(self,
self.bboxes = bboxes

self.annot_path = Path(annot_path)
if audio_path:
self.audio_path = Path(audio_path)
if notated_path:
self.notated_path = Path(notated_path)
else:
self.audio_path = audio_path
self.notated_path = notated_path

def __repr__(self):
repr_ = f'Annotation(annot_path={repr(self.annot_path)}, audio_path={repr(self.audio_path)}, '
repr_ = f'Annotation(annot_path={repr(self.annot_path)}, notated_path={repr(self.notated_path)}, '
if hasattr(self, 'seq'):
repr_ += f'seq={self.seq})'
elif hasattr(self, 'bboxes'):
Expand All @@ -88,7 +90,7 @@ def __repr__(self):

def __eq__(self, other):
is_annot_and_audio_eq = (self.annot_path == other.annot_path and
self.audio_path == other.audio_path)
self.notated_path == other.notated_path)
if hasattr(self, 'seq') and hasattr(other, 'seq'):
return is_annot_and_audio_eq and self.seq == other.seq
elif hasattr(self, 'bboxes') and hasattr(other, 'bboxes'):
Expand Down
2 changes: 1 addition & 1 deletion src/crowsetta/formats/bbox/raven.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def to_annot(self) -> crowsetta.Annotation:
"""
bboxes = self.to_bbox()
return crowsetta.Annotation(annot_path=self.annot_path,
audio_path=self.audio_path,
notated_path=self.audio_path,
bboxes=bboxes)

def to_file(self,
Expand Down
2 changes: 1 addition & 1 deletion src/crowsetta/formats/seq/birdsongrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,6 @@ def to_annot(self,
annot_list = []
for seq, wav_filename in zip(seqs, wav_filenames):
annot_list.append(
crowsetta.Annotation(seq=seq, annot_path=self.xml_path, audio_path=wav_filename)
crowsetta.Annotation(seq=seq, annot_path=self.xml_path, notated_path=wav_filename)
)
return annot_list
34 changes: 17 additions & 17 deletions src/crowsetta/formats/seq/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GenericSeqSchema(pandera.SchemaModel):
onset_ind: Optional[Series[int]] = pandera.Field()
offset_ind: Optional[Series[int]] = pandera.Field()

audio_path: Series[str] = pandera.Field()
notated_path: Series[str] = pandera.Field()
annot_path: Series[str] = pandera.Field()
sequence: Series[int] = pandera.Field()
annotation: Series[int] = pandera.Field()
Expand Down Expand Up @@ -144,20 +144,20 @@ def annot2csv(annot: Union[crowsetta.Annotation, List[crowsetta.Annotation]],
if not(val is None and any([key.startswith(prefix) for prefix in ('onset', 'offset')]))
}) # OrderedDict is default; being extra explicit here
annot_path = annot_.annot_path
audio_path = annot_.audio_path
notated_path = annot_.notated_path
if abspath:
annot_path = os.path.abspath(annot_path)
if audio_path is not None:
audio_path = os.path.abspath(audio_path)
if notated_path is not None:
notated_path = os.path.abspath(notated_path)
elif basename:
annot_path = os.path.basename(annot_path)
if audio_path is not None:
audio_path = os.path.basename(audio_path)
# need to put in audio_path before annot_path
if audio_path is not None:
row['audio_path'] = audio_path
if notated_path is not None:
notated_path = os.path.basename(notated_path)
# need to put in notated_path before annot_path
if notated_path is not None:
row['notated_path'] = notated_path
else:
row['audio_path'] = 'None'
row['notated_path'] = 'None'
row['annot_path'] = annot_path
# we use 'sequence' and 'annotation' fields when we are
# loading back into Annotations
Expand Down Expand Up @@ -203,14 +203,14 @@ def csv2annot(csv_path: PathLike) -> List[crowsetta.Annotation]:
f"\n{annot_path}"
)
annot_path = annot_path[0]
# 2. audio_path
audio_path = df_annot.audio_path.unique()
if len(audio_path) > 1:
# 2. notated_path
notated_path = df_annot.notated_path.unique()
if len(notated_path) > 1:
raise ValueError(
f"found multiple values for 'audio_path' for annotation #{annotation_ind}:"
f"\n{audio_path}"
f"found multiple values for 'notated_path' for annotation #{annotation_ind}:"
f"\n{notated_path}"
)
audio_path = audio_path[0]
notated_path = notated_path[0]
# 3. Sequence
seq_uniq = df_annot.sequence.unique()
assert len(seq_uniq) > 0
Expand Down Expand Up @@ -240,7 +240,7 @@ def csv2annot(csv_path: PathLike) -> List[crowsetta.Annotation]:
)
annot = crowsetta.Annotation(
annot_path=annot_path,
audio_path=audio_path,
notated_path=notated_path,
seq=seq
)
annot_list.append(annot)
Expand Down
2 changes: 1 addition & 1 deletion src/crowsetta/formats/seq/notmat.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def to_annot(self,
seq = self.to_seq(round_times=round_times, decimals=decimals)

return crowsetta.Annotation(annot_path=self.notmat_path,
audio_path=self.audio_path,
notated_path=self.audio_path,
seq=seq)

def to_file(self,
Expand Down
29 changes: 18 additions & 11 deletions src/crowsetta/formats/seq/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,11 @@ class SimpleSeq:
or a single phonetic transcription code.
annot_path : str, pathlib.Path
Path to file from which annotations were loaded.
audio_path : str. pathlib.Path
Path to audio file that the ``annot_path`` annotates.
notated_path : str. pathlib.Path
path to file that ``annot_path`` annotates.
E.g., an audio file, or an array file
that contains a spectrogram generated from audio.
Optional, default is None.
"""
name: ClassVar[str] = 'simple-seq'
ext: ClassVar[str] = ('.csv', '.txt')
Expand All @@ -97,13 +100,13 @@ class SimpleSeq:
offsets_s: np.ndarray = attr.field(eq=attr.cmp_using(eq=np.array_equal))
labels: np.ndarray = attr.field(eq=attr.cmp_using(eq=np.array_equal))
annot_path: pathlib.Path
audio_path: Optional[pathlib.Path] = attr.field(default=None,
converter=attr.converters.optional(pathlib.Path))
notated_path: Optional[pathlib.Path] = attr.field(default=None,
converter=attr.converters.optional(pathlib.Path))

@classmethod
def from_file(cls,
annot_path: PathLike,
audio_path: Optional[PathLike] = None,
notated_path: Optional[PathLike] = None,
columns_map: Optional[Mapping] = None,
read_csv_kwargs: Optional[Mapping] = None
) -> 'Self':
Expand All @@ -114,14 +117,18 @@ def from_file(cls,
annot_path : str, pathlib.Path
Path to an annotation file,
with one of the extensions {'.csv', '.txt'}.
audio_path : str, pathlib.Path
Optional, path to audio file
that ``annot_path`` annotates.
notated_path : str, pathlib.Path
path to file that ``annot_path`` annotates.
E.g., an audio file, or an array file
that contains a spectrogram generated from audio.
Optional, default is None.
columns_map : dict-like
Maps column names in header of ``annot_path``
to the standardized names
used by this format.
E.g., ``{'begin_time': 'onset_s', 'end_time': 'offset_s', 'text': 'label'}``
E.g., ``{'begin_time': 'onset_s', 'end_time': 'offset_s', 'text': 'label'}``.
Optional, default is None--assumes that
columns have the standardized names.
read_csv_kwargs : dict
keyword arguments passed to
``pandas.read_csv``. Default is None,
Expand All @@ -147,7 +154,7 @@ def from_file(cls,
offsets_s=df['offset_s'].values,
labels=df['label'].values,
annot_path=annot_path,
audio_path=audio_path,
notated_path=notated_path,
)

def to_seq(self,
Expand Down Expand Up @@ -217,7 +224,7 @@ def to_annot(self,
the result should be the same on Windows and Linux.
"""
seq = self.to_seq(round_times, decimals)
return crowsetta.Annotation(annot_path=self.annot_path, audio_path=self.audio_path, seq=seq)
return crowsetta.Annotation(annot_path=self.annot_path, notated_path=self.notated_path, seq=seq)

def to_file(self,
annot_path: PathLike,
Expand Down
2 changes: 1 addition & 1 deletion src/crowsetta/formats/seq/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,5 +189,5 @@ def to_annot(self,
decimals=decimals)

return crowsetta.Annotation(annot_path=self.textgrid_path,
audio_path=self.audio_path,
notated_path=self.audio_path,
seq=seq)
2 changes: 1 addition & 1 deletion src/crowsetta/formats/seq/timit.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def to_annot(self,
the result should be the same on Windows and Linux.
"""
phn_seq = self.to_seq(round_times, decimals, samplerate)
return crowsetta.Annotation(annot_path=self.transcript_path, audio_path=self.audio_path, seq=phn_seq)
return crowsetta.Annotation(annot_path=self.transcript_path, notated_path=self.audio_path, seq=phn_seq)

def to_file(self,
transcript_path: PathLike) -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/crowsetta/formats/seq/yarden.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def to_annot(self,
for audio_path, seq in zip(self.audio_paths, seqs):
annots.append(
crowsetta.Annotation(annot_path=self.annot_path,
audio_path=audio_path,
notated_path=audio_path,
seq=seq)
)
return annots
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/birdsongrec_Bird0_Annotation.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,onset_ind,offset_ind,audio_path,annot_path,sequence,annotation
label,onset_s,offset_s,onset_ind,offset_ind,notated_path,annot_path,sequence,annotation
0,1.07,1.154,34240,36928,0.wav,Annotation.xml,0,0
0,1.258,1.345,40256,43040,0.wav,Annotation.xml,0,0
0,1.467,1.555,46944,49760,0.wav,Annotation.xml,0,0
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/example_user_annotation.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,audio_path,annot_path,sequence,annotation
label,onset_s,offset_s,notated_path,annot_path,sequence,annotation
1,0.0029761904761904,0.141504329004329,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0
1,0.279125,0.504625,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0
5,0.5556472915365209,0.5962916666666667,lbr3009_0005_2017_04_27_06_14_46.wav,../test_data/example_user_format/bird1_annotation.mat,0,0
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/invalid_fields_in_header.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,audio_path,annot_path,sequence,annotation,invalid
label,onset_s,offset_s,notated_path,annot_path,sequence,annotation,invalid
i,1.278,1.351,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0,i
i,1.452,1.536,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0,i
i,1.605,1.712,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0,i
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/missing_fields_in_header.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
onset_s,offset_s,audio_path,annot_path,sequence,annotation
onset_s,offset_s,notated_path,annot_path,sequence,annotation
1.278,1.351,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
1.452,1.536,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
1.605,1.712,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/no_onset_or_offset_column.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,audio_path,annot_path,sequence,annotation
label,notated_path,annot_path,sequence,annotation
i,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/notmat_gy6or6_032312.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,audio_path,annot_path,sequence,annotation
label,onset_s,offset_s,notated_path,annot_path,sequence,annotation
i,1.278,1.351,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,1.452,1.536,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,1.605,1.712,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,onset_ind,audio_path,annot_path,sequence,annotation
label,onset_s,offset_s,onset_ind,notated_path,annot_path,sequence,annotation
h#,0.0,0.488,0,sa1.wav,sa1.phn,0,0
sh,0.488,0.594,7812,sa1.wav,sa1.phn,0,0
iy,0.594,0.663,9507,sa1.wav,sa1.phn,0,0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,audio_path,annot_path,sequence,annotation
label,onset_s,notated_path,annot_path,sequence,annotation
i,1.278,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,1.452,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
i,1.605,gy6or6_baseline_230312_0808.138.cbin,gy6or6_baseline_230312_0808.138.cbin.not.mat,0,0
Expand Down
2 changes: 1 addition & 1 deletion tests/data_for_tests/csv/timit-dr1-fvmh0-phn.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
label,onset_s,offset_s,onset_ind,offset_ind,audio_path,annot_path,sequence,annotation
label,onset_s,offset_s,onset_ind,offset_ind,notated_path,annot_path,sequence,annotation
h#,0.0,0.488,0,7812,sa1.wav,sa1.phn,0,0
sh,0.488,0.594,7812,9507,sa1.wav,sa1.phn,0,0
iy,0.594,0.663,9507,10610,sa1.wav,sa1.phn,0,0
Expand Down
22 changes: 11 additions & 11 deletions tests/scripts/remake_test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,43 @@
def remake_notmat_as_generic_seq_csv():
cbin_dir = TEST_DATA.joinpath('cbins/gy6or6/032312/')
notmat_paths = sorted(cbin_dir.glob('*.not.mat'))
annots = [crowsetta.formats.NotMat.from_file(notmat_path).to_annot()
annots = [crowsetta.formats.seq.NotMat.from_file(notmat_path).to_annot()
for notmat_path in notmat_paths]
notmat_generic_seq = crowsetta.formats.GenericSeq(annots=annots)
notmat_generic_seq = crowsetta.formats.seq.GenericSeq(annots=annots)
csv_path = TEST_DATA / 'csv' / 'notmat_gy6or6_032312.csv'
print(
f'saving csv: {csv_path}'
)
notmat_generic_seq.to_csv(csv_path=csv_path, basename=True)
notmat_generic_seq.to_file(csv_path=csv_path, basename=True)


def remake_birdsongrec_as_generic_seq_csv():
birdsongrec_dir = TEST_DATA / 'birdsongrec' / 'Bird0'
birdsongrec_xml_file = birdsongrec_dir / 'Annotation.xml'
birdsongrec_wavpath = birdsongrec_dir / 'Wave'
birdsongrec = crowsetta.formats.BirdsongRec.from_file(xml_path=birdsongrec_xml_file,
wav_path=birdsongrec_wavpath,
concat_seqs_into_songs=True)
birdsongrec = crowsetta.formats.seq.BirdsongRec.from_file(xml_path=birdsongrec_xml_file,
wav_path=birdsongrec_wavpath,
concat_seqs_into_songs=True)
annots = birdsongrec.to_annot()
generic_seq = crowsetta.formats.GenericSeq(annots=annots)
generic_seq = crowsetta.formats.seq.GenericSeq(annots=annots)
csv_path = TEST_DATA / 'csv' / 'birdsongrec_Bird0_Annotation.csv'
print(
f'saving csv: {csv_path}'
)
generic_seq.to_csv(csv_path=csv_path, basename=True)
generic_seq.to_file(csv_path=csv_path, basename=True)


def remake_timit_phn_as_generic_seq_csv():
timit_kaggle_dir = TEST_DATA / 'timit_kaggle' / 'dr1-fvmh0'
phn_paths = sorted(timit_kaggle_dir.glob('*.phn'))
annots = [crowsetta.formats.Timit.from_file(phn_path).to_annot()
annots = [crowsetta.formats.seq.Timit.from_file(phn_path).to_annot()
for phn_path in phn_paths]
timit_generic_seq = crowsetta.formats.GenericSeq(annots=annots)
timit_generic_seq = crowsetta.formats.seq.GenericSeq(annots=annots)
csv_path = TEST_DATA / 'csv' / 'timit-dr1-fvmh0-phn.csv'
print(
f'saving csv: {csv_path}'
)
timit_generic_seq.to_csv(csv_path=csv_path, basename=True)
timit_generic_seq.to_file(csv_path=csv_path, basename=True)


def remake_invalid_fields_in_header_csv(source_csv_path):
Expand Down

0 comments on commit d80715b

Please sign in to comment.