Skip to content

Commit

Permalink
Merge pull request #164 from vocalpy/add-raven-format
Browse files Browse the repository at this point in the history
Add raven format
  • Loading branch information
NickleDave committed May 11, 2022
2 parents 36f11a3 + 8965906 commit 2cb75c1
Show file tree
Hide file tree
Showing 49 changed files with 6,791 additions and 9 deletions.
1 change: 1 addition & 0 deletions src/crowsetta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
validation
)

from .bbox import BBox
from .transcriber import Transcriber
from .segment import Segment
from .sequence import Sequence
Expand Down
103 changes: 94 additions & 9 deletions src/crowsetta/annotation.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,100 @@
"""class that represents annotations from a single file"""
from pathlib import Path
from typing import List, Optional

import attr
from attr import validators, converters
from attr.validators import instance_of

import crowsetta
from .bbox import BBox
from .sequence import Sequence
from .typing import PathLike


@attr.s
class Annotation:
"""a class to represent annotations for a single file"""
annot_path = attr.ib(converter=Path)
audio_path = attr.ib(converter=converters.optional(Path), default=None)
seq = attr.ib(validator=validators.optional(instance_of(Sequence)), default=None)
"""A class to represent annotations for a single file.
The annotations can be one of two types:
a single sequence, or a list of bounding boxes.
Attributes
----------
annot_path : str, pathlib.Path
path to file from which annotations were loaded
audio_path : str, pathlib.Path
path to audio file that ``annot_path`` annotates.
Optional, default is None.
seq : crowsetta.Sequence
a sequence of annotated segments,
each having an onset time, offset time,
and label.
bboxes : list
of ``crowsetta.BBox``,
annotated bounding boxes,
each having an onset time, offset time,
lowest frequency, highest frequency,
and label.
Notes
-----
A ``crowsetta.Annotation`` can have a ``seq``
or ``bboxes``, but not both.
"""
def __init__(self,
annot_path: PathLike,
audio_path: Optional[PathLike] = None,
seq: Optional[Sequence] = None,
bboxes: Optional[List[BBox]] = None):
if seq is None and bboxes is None:
raise ValueError(
'an Annotation must have either a ``seq`` or ``bboxes``'
)

if seq is not None and bboxes is not None:
raise ValueError(
'an Annotation can have either a ``seq``'
'or ``bboxes``, but not both.'
)

if seq:
if not isinstance(seq, crowsetta.Sequence):
raise TypeError(
f'``seq`` should be a ``crowsetta.Sequence`` but was: {type(seq)}'
)
self.seq = seq

if bboxes:
if not isinstance(bboxes, list):
raise ValueError(
'``bboxes`` should be a list'
)
if not all(
[isinstance(bbox, BBox) for bbox in bboxes]
):
raise ValueError(
'``bboxes`` should be a list of ``crowsetta.BBox`` instances'
)
self.bboxes = bboxes

self.annot_path = Path(annot_path)
if audio_path:
self.audio_path = Path(audio_path)
else:
self.audio_path = audio_path

def __repr__(self):
repr_ = f'Annotation(annot_path={repr(self.annot_path)}, audio_path={repr(self.audio_path)}, '
if hasattr(self, 'seq'):
repr_ += f'seq={self.seq})'
elif hasattr(self, 'bboxes'):
repr_ += f'bboxes={self.bboxes})'
return repr_

def __eq__(self, other):
is_annot_and_audio_eq = (self.annot_path == other.annot_path and
self.audio_path == other.audio_path)
if hasattr(self, 'seq') and hasattr(other, 'seq'):
return is_annot_and_audio_eq and self.seq == other.seq
elif hasattr(self, 'bboxes') and hasattr(other, 'bboxes'):
return is_annot_and_audio_eq and self.bboxes == other.bboxes
else:
return False

def __ne__(self, other):
return not self == other
51 changes: 51 additions & 0 deletions src/crowsetta/bbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import attrs
from attrs import field


def is_positive(self, attribute, value):
if value < 0.:
raise ValueError(
'All input values must be positive'
)


@attrs.define
class BBox:
"""Class that represents a bounding box
on a spectrogram,
drawn around an animal vocalization
or other sound.
Attributes
----------
onset : float
Time of sound onset, typically in seconds.
offset : float
Time of sound offset, typically in seconds.
low_freq : float
Lowest frequency bounding sound, typically in Hz.
high_freq : float
Highest frequency bounding sound, typically in Hz.
label : str
string label that annotates bounding box
"""
onset: float = field(validator=is_positive)
@onset.validator
def lt_offset(self, attribute, value):
if not value < self.offset:
raise ValueError(
'Bounding box onset must be less than offset.'
f'Onset was {value}, offset was {self.offset}'
)

offset: float = field(validator=is_positive)
low_freq: float = field(validator=is_positive)
@low_freq.validator
def lt_high_freq(self, attribute, value):
if not value < self.high_freq:
raise ValueError(
'Low frequency of bounding box must be less than high frequency.'
f'Low frequency was {value}, high frequency was {self.high_freq}'
)
high_freq: float = field(validator=is_positive)
label: str
5 changes: 5 additions & 0 deletions src/crowsetta/formats/bbox/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .raven import Raven

__all__ = [
'Raven',
]
169 changes: 169 additions & 0 deletions src/crowsetta/formats/bbox/raven.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""module with functions that handle .txt annotation files
from Raven (https://ravensoundsoftware.com/software/).
Adapted in part from ``opensoundscape``
https://github.com/kitzeslab/opensoundscape/blob/master/opensoundscape/annotations.py
under MIT license
"""
import pathlib
from typing import ClassVar, List, Optional

import attr
import pandas as pd
import pandera
from pandera.typing import Series

import crowsetta
from crowsetta.typing import PathLike


class RavenSchema(pandera.SchemaModel):
"""A ``pandera.SchemaModel`` that validates ``pandas`` dataframes
loaded from a .txt file, created by exporting a Selection Table
from Raven.
"""
begin_time_s: Series[float] = pandera.Field()
end_time_s: Series[float] = pandera.Field()
low_freq_hz: Series[float] = pandera.Field()
high_freq_hz: Series[float] = pandera.Field()
annotation: Series[pd.StringDtype] = pandera.Field(coerce=True)

class Config:
# we set strict fo False
# because we just ignore other columns, e.g. 'Selection',
# and because there should be an annotation column
# and we don't want to throw an error because of it
strict = False


COLUMNS_MAP = {
"Begin Time (s)": "begin_time_s",
"End Time (s)": "end_time_s",
"Low Freq (Hz)": "low_freq_hz",
"High Freq (Hz)": "high_freq_hz",
}


@crowsetta.interface.BBoxLike.register
@attr.define
class Raven:
"""Class that represents .txt annotation files
from Raven (https://ravensoundsoftware.com/software/),
created by exporting a Selection Table.
Attributes
----------
name: str
Shorthand name for annotation format: 'raven'.
ext: str
Extension of files in annotation format: '.txt'
df : pandas.DataFrame
with annotations loaded into it
annot_path : str, pathlib.Path
Path to Raven .txt file from which annotations were loaded.
audio_path : str. pathlib.Path
Path to audio file that the Raven .txt file annotates.
"""
name: ClassVar[str] = 'raven'
ext: ClassVar[str] = ('.txt',)

df: pd.DataFrame
annot_path: pathlib.Path
annot_col: str
audio_path: Optional[pathlib.Path] = attr.field(default=None,
converter=attr.converters.optional(pathlib.Path))

@classmethod
def from_file(cls,
annot_path: PathLike,
annot_col: str = 'Annotation',
audio_path: Optional[PathLike] = None) -> 'Self':
"""Load annotations from a Raven annotation file,
created by exporting a Selection Table.
Parameters
----------
annot_path : str, pathlib.Path
Path to a .txt file exported from Raven.
annot_col : str
name of column that contains annotations
audio_path : str, pathlib.Path
Path to audio file that the Raven .txt file annotates.
Optional, defaults to None.
"""
annot_path = pathlib.Path(annot_path)
crowsetta.validation.validate_ext(annot_path, extension=cls.ext)

# assume file is space-separated with no header
df = pd.read_csv(annot_path, sep='\t')
if len(df) < 1:
raise ValueError(
f'Cannot load annotations, '
f'there are no rows in Raven .txt file:\n{df}'
)
columns_map = dict(COLUMNS_MAP) # copy
columns_map.update({annot_col: 'annotation'})
df.rename(columns=columns_map, inplace=True)
df = RavenSchema.validate(df)

return cls(
df=df,
annot_path=annot_path,
annot_col=annot_col,
audio_path=audio_path,
)

def to_bbox(self) -> List[crowsetta.BBox]:
"""Convert this Raven annotation to a ``list`` of ``crowsetta.Bbox``.
Returns
-------
bboxes : list
of ``crowsetta.BBox``
"""
bboxes = []
for begin_time, end_time, low_freq, high_freq, label in zip(
self.df.begin_time_s.values,
self.df.end_time_s.values,
self.df.low_freq_hz.values,
self.df.high_freq_hz.values,
self.df['annotation'].values,
):
bboxes.append(
crowsetta.BBox(onset=begin_time,
offset=end_time,
low_freq=low_freq,
high_freq=high_freq,
label=label)
)
return bboxes

def to_annot(self) -> crowsetta.Annotation:
"""Convert this Raven annotation to a ``crowsetta.Annotation``.
Returns
-------
annot : crowsetta.Annotation
"""
bboxes = self.to_bbox()
return crowsetta.Annotation(annot_path=self.annot_path,
audio_path=self.audio_path,
bboxes=bboxes)

def to_file(self,
annot_path: PathLike) -> None:
"""make a .txt file that can be read by Raven
from this annotation
Parameters
----------
annot_path : str, pahtlib.Path
path including filename where file should be saved.
Must have extension '.txt'
"""
crowsetta.validation.validate_ext(annot_path, extension=self.ext)

columns_map = {v: k for k, v in COLUMNS_MAP.items()} # copy
columns_map.update({'annotation': self.annot_col})
df_out = self.df.rename(columns=columns_map)
df_out.to_csv(annot_path, sep='\t', index=False)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Selection View Channel Begin Time (s) End Time (s) Low Freq (Hz) High Freq (Hz) Species
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Selection View Channel Begin Time (s) End Time (s) Low Freq (Hz) High Freq (Hz) Species
1 Spectrogram 1 1 154.387792767 154.911598217 2878.2 4049.0 EATO
2 Spectrogram 1 1 167.526598245 168.17302044 2731.9 3902.7 EATO
3 Spectrogram 1 1 183.609636834 184.097751553 2878.2 3975.8 EATO
4 Spectrogram 1 1 250.527480604 251.160710509 2756.2 3951.4 EATO
5 Spectrogram 1 1 277.88724277 278.480895806 2707.5 3975.8 EATO
6 Spectrogram 1 1 295.52970757 296.110168316 2951.4 3975.8 EATO

0 comments on commit 2cb75c1

Please sign in to comment.