Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions darwin/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ class Annotation:
#: Authorship of the annotation (reviewers)
reviewers: Optional[List[AnnotationAuthor]] = None

# The darwin ID of this annotation.
id: Optional[str] = None

def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]:
"""
Returns the first SubAnnotation that matches the given type.
Expand Down Expand Up @@ -219,6 +222,9 @@ class VideoAnnotation:
#: Authorship of the annotation (reviewers)
reviewers: Optional[List[AnnotationAuthor]] = None

# The darwin ID of this annotation.
id: Optional[str] = None

def get_data(
self,
only_keyframes: bool = True,
Expand Down Expand Up @@ -388,6 +394,9 @@ class AnnotationFile:
# e.g. (1, 0, 'a')
version: AnnotationFileVersion = field(default_factory=AnnotationFileVersion)

# The darwin ID of the item that these annotations belong to.
item_id: Optional[str] = None

@property
def full_path(self) -> str:
"""
Expand Down
12 changes: 11 additions & 1 deletion darwin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
version=_parse_version(data),
path=path,
filename=item["name"],
item_id=item.get("source_info", {}).get("item_id", None),
dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
annotation_classes=annotation_classes,
annotations=annotations,
Expand All @@ -456,6 +457,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
version=_parse_version(data),
path=path,
filename=item["name"],
item_id=item.get("source_info", {}).get("item_id", None),
dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
annotation_classes=annotation_classes,
annotations=annotations,
Expand Down Expand Up @@ -632,6 +634,8 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
print(f"[WARNING] Unsupported annotation type: '{annotation.keys()}'")
return None

if "id" in annotation:
main_annotation.id = annotation["id"]
if "instance_id" in annotation:
main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"]))
if "attributes" in annotation:
Expand Down Expand Up @@ -662,7 +666,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
keyframes: Dict[int, bool] = {}
frames = {**annotation.get("frames", {}), **annotation.get("sections", {})}
for f, frame in frames.items():
frame_annotations[int(f)] = _parse_darwin_annotation({**frame, **{"name": name}})
frame_annotations[int(f)] = _parse_darwin_annotation(
{**frame, **{"name": name, "id": annotation.get("id", None)}}
)
keyframes[int(f)] = frame.get("keyframe", False)

if not frame_annotations:
Expand All @@ -675,6 +681,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
slot_names=parse_slot_names(annotation),
)

if "id" in annotation:
main_annotation.id = annotation["id"]

if "annotators" in annotation:
main_annotation.annotators = _parse_annotators(annotation["annotators"])

Expand Down Expand Up @@ -734,6 +743,7 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF
frame_url,
annotation.workview_url,
annotation.seq,
item_id=annotation.item_id,
slots=annotation.slots,
)
)
Expand Down
128 changes: 122 additions & 6 deletions tests/darwin/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def it_parses_darwin_images_correctly(tmp_path):
assert annotation_file.path == import_file
assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg"
assert annotation_file.dataset_name == None
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")

assert len(annotation_file.annotations) == 2
assert len(annotation_file.annotation_classes) == 2
Expand Down Expand Up @@ -236,7 +236,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
assert annotation_file.path == import_file
assert annotation_file.filename == "above tractor.mp4"
assert annotation_file.dataset_name == None
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")

assert len(annotation_file.annotations) == 1
assert len(annotation_file.annotation_classes) == 1
Expand Down Expand Up @@ -272,7 +272,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
)
]

def it_parses_darwin_v2_correctly(tmp_path):
def it_parses_darwin_v2_images_correctly(tmp_path):
content = """
{
"version": "2.0",
Expand Down Expand Up @@ -355,19 +355,135 @@ def it_parses_darwin_v2_correctly(tmp_path):
assert annotation_file.path == import_file
assert annotation_file.filename == "item-0.jpg"
assert annotation_file.dataset_name == "Dataset 0"
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix='')
assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")

assert len(annotation_file.annotations) == 1
assert len(annotation_file.annotation_classes) == 1
assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
assert not annotation_file.is_video
assert annotation_file.image_width == 123
assert annotation_file.image_height == 456
assert annotation_file.image_url == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
assert annotation_file.workview_url == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
assert (
annotation_file.image_url
== "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
)
assert (
annotation_file.workview_url
== "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
)
assert not annotation_file.seq
assert not annotation_file.frame_urls
assert annotation_file.remote_path == "/path-0/folder"

def it_parses_darwin_v2_videos_correctly(tmp_path):
content = """
{
"version": "2.0",
"schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json_2_0.schema.json",
"item": {
"name": "item-0.mp4",
"path": "/path-0/folder",
"source_info": {
"dataset": {
"name": "Dataset 0",
"slug": "dataset-0",
"dataset_management_url": "http://example.com/datasets/545/dataset-management"
},
"item_id": "0185c280-bbad-6117-71a7-a6853a6e3f2e",
"team": {
"name": "Team 0",
"slug": "team-0"
},
"workview_url": "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
},
"slots": [
{
"type": "video",
"slot_name": "0",
"width": 123,
"height": 456,
"thumbnail_url": "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/thumbnail",
"frame_urls": [
"http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/1",
"http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/2"
],
"frame_count": 2,
"source_files": [
{
"file_name": "file-0",
"url": "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
}
]
}
]
},
"annotations": [
{
"frames": {
"3": {
"bounding_box": {
"h": 2,
"w": 1,
"x": 1,
"y": 1
},
"polygon": {
"paths": [
[
{ "x": 1, "y": 1 },
{ "x": 2, "y": 2 },
{ "x": 1, "y": 3 }
]
]
}
}
},
"id": "f8f5f235-bd47-47be-b4fe-07d49e0177a7",
"interpolate_algorithm": "linear-1.1",
"interpolated": true,
"name": "polygon",
"ranges": [ [ 0, 1 ] ],
"slot_names": [
"1"
]
}
]
}
"""

directory = tmp_path / "imports"
directory.mkdir()
import_file = directory / "darwin-file.json"
import_file.write_text(content)

annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None)

assert annotation_file.path == import_file
assert annotation_file.filename == "item-0.mp4"
assert annotation_file.dataset_name == "Dataset 0"
assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")

assert len(annotation_file.annotations) == 1
assert len(annotation_file.annotation_classes) == 1
assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
assert list(annotation_file.annotations[0].frames.keys()) == [3]
assert annotation_file.annotations[0].frames[3].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
assert annotation_file.is_video
assert annotation_file.image_width == 123
assert annotation_file.image_height == 456
assert (
annotation_file.image_url
== "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
)
assert (
annotation_file.workview_url
== "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
)
assert not annotation_file.seq
assert len(annotation_file.frame_urls) == 2
assert annotation_file.remote_path == "/path-0/folder"

def it_returns_None_if_no_annotations_exist(tmp_path):
content = """
Expand Down