From 18508d423468dbd67847aa1927476af98bb1bff5 Mon Sep 17 00:00:00 2001 From: Steve Anderson Date: Thu, 23 Feb 2023 15:56:08 +0000 Subject: [PATCH 1/4] [MOD-603][external] Add annotation file item_id and annotation id. These fields are present in the darwin JSON v2 export, but they are not exposed by the relevant python types. We need these for some upcoming work which reads darwin-py exports. --- darwin/datatypes.py | 6 ++++++ darwin/utils.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 93dfe315a..f0119c589 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -168,6 +168,9 @@ class Annotation: #: Authorship of the annotation (reviewers) reviewers: Optional[List[AnnotationAuthor]] = None + # The darwin ID of this annotation. + id: Optional[str] = None + def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]: """ Returns the first SubAnnotation that matches the given type. @@ -346,6 +349,9 @@ class AnnotationFile: #: List of ``VideoAnnotation``\s or ``Annotation``\s. annotations: Sequence[Union[Annotation, VideoAnnotation]] + # The darwin ID of the item that these annotations belong to. + item_id: Optional[str] = None + # Deprecated #: Whether the annotations in the ``annotations`` attribute are ``VideoAnnotation`` or not. is_video: bool = False diff --git a/darwin/utils.py b/darwin/utils.py index efd2bd641..48a696ec9 100644 --- a/darwin/utils.py +++ b/darwin/utils.py @@ -436,6 +436,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: version=_parse_version(data), path=path, filename=item["name"], + item_id=item.get("source_info", {}).get("item_id", None), dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), annotation_classes=annotation_classes, annotations=annotations, @@ -456,6 +457,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: version=_parse_version(data), path=path, filename=item["name"], + item_id=item.get("source_info", {}).get("item_id", None), dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), annotation_classes=annotation_classes, annotations=annotations, @@ -632,6 +634,8 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati print(f"[WARNING] Unsupported annotation type: '{annotation.keys()}'") return None + if "id" in annotation: + main_annotation.id = annotation["id"] if "instance_id" in annotation: main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"])) if "attributes" in annotation: @@ -734,6 +738,7 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF frame_url, annotation.workview_url, annotation.seq, + item_id=annotation.item_id, slots=annotation.slots, ) ) From 5867eeea7c6853099674a84e06f6bbf581ec0814 Mon Sep 17 00:00:00 2001 From: Steve Anderson Date: Thu, 23 Feb 2023 16:35:14 +0000 Subject: [PATCH 2/4] [MOD-603][internal] Pass annotation ID through video annotations --- darwin/datatypes.py | 3 +++ darwin/utils.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index f0119c589..b5b8674e4 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -222,6 +222,9 @@ class VideoAnnotation: #: Authorship of the annotation (reviewers) reviewers: Optional[List[AnnotationAuthor]] = None + # The darwin ID of this annotation. + id: Optional[str] = None + def get_data( self, only_keyframes: bool = True, diff --git a/darwin/utils.py b/darwin/utils.py index 48a696ec9..ec5a2fdeb 100644 --- a/darwin/utils.py +++ b/darwin/utils.py @@ -666,7 +666,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat keyframes: Dict[int, bool] = {} frames = {**annotation.get("frames", {}), **annotation.get("sections", {})} for f, frame in frames.items(): - frame_annotations[int(f)] = _parse_darwin_annotation({**frame, **{"name": name}}) + frame_annotations[int(f)] = _parse_darwin_annotation( + {**frame, **{"name": name, "id": annotation.get("id", None)}} + ) keyframes[int(f)] = frame.get("keyframe", False) if not frame_annotations: @@ -679,6 +681,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat slot_names=parse_slot_names(annotation), ) + if "id" in annotation: + main_annotation.id = annotation["id"] + if "annotators" in annotation: main_annotation.annotators = _parse_annotators(annotation["annotators"]) From 4878ef98b644d35d2e8d520cc0d21930c11e498c Mon Sep 17 00:00:00 2001 From: Steve Anderson Date: Thu, 23 Feb 2023 16:39:02 +0000 Subject: [PATCH 3/4] [MOD-603][internal] Move id to bottom for backwards compatibility --- darwin/datatypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index b5b8674e4..4f0aa2cd3 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -352,9 +352,6 @@ class AnnotationFile: #: List of ``VideoAnnotation``\s or ``Annotation``\s. annotations: Sequence[Union[Annotation, VideoAnnotation]] - # The darwin ID of the item that these annotations belong to. - item_id: Optional[str] = None - # Deprecated #: Whether the annotations in the ``annotations`` attribute are ``VideoAnnotation`` or not. is_video: bool = False @@ -397,6 +394,9 @@ class AnnotationFile: # e.g. (1, 0, 'a') version: AnnotationFileVersion = field(default_factory=AnnotationFileVersion) + # The darwin ID of the item that these annotations belong to. + item_id: Optional[str] = None + @property def full_path(self) -> str: """ From fe46906135147af191391c2056510431f8c61214 Mon Sep 17 00:00:00 2001 From: Steve Anderson Date: Mon, 27 Feb 2023 10:50:00 +0000 Subject: [PATCH 4/4] [MOD-603][internal] Add tests for new IDs --- tests/darwin/utils_test.py | 128 +++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 6 deletions(-) diff --git a/tests/darwin/utils_test.py b/tests/darwin/utils_test.py index b51ebfd07..db878dab1 100644 --- a/tests/darwin/utils_test.py +++ b/tests/darwin/utils_test.py @@ -149,7 +149,7 @@ def it_parses_darwin_images_correctly(tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg" assert annotation_file.dataset_name == None - assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='') + assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="") assert len(annotation_file.annotations) == 2 assert len(annotation_file.annotation_classes) == 2 @@ -236,7 +236,7 @@ def it_parses_darwin_videos_correctly(tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "above tractor.mp4" assert annotation_file.dataset_name == None - assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='') + assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="") assert len(annotation_file.annotations) == 1 assert len(annotation_file.annotation_classes) == 1 @@ -272,7 +272,7 @@ def it_parses_darwin_videos_correctly(tmp_path): ) ] - def it_parses_darwin_v2_correctly(tmp_path): + def it_parses_darwin_v2_images_correctly(tmp_path): content = """ { "version": "2.0", @@ -355,19 +355,135 @@ def it_parses_darwin_v2_correctly(tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "item-0.jpg" assert annotation_file.dataset_name == "Dataset 0" - assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix='') + assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e" + assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="") assert len(annotation_file.annotations) == 1 assert len(annotation_file.annotation_classes) == 1 + assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" assert not annotation_file.is_video assert annotation_file.image_width == 123 assert annotation_file.image_height == 456 - assert annotation_file.image_url == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f" - assert annotation_file.workview_url == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e" + assert ( + annotation_file.image_url + == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f" + ) + assert ( + annotation_file.workview_url + == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e" + ) assert not annotation_file.seq assert not annotation_file.frame_urls assert annotation_file.remote_path == "/path-0/folder" + def it_parses_darwin_v2_videos_correctly(tmp_path): + content = """ + { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json_2_0.schema.json", + "item": { + "name": "item-0.mp4", + "path": "/path-0/folder", + "source_info": { + "dataset": { + "name": "Dataset 0", + "slug": "dataset-0", + "dataset_management_url": "http://example.com/datasets/545/dataset-management" + }, + "item_id": "0185c280-bbad-6117-71a7-a6853a6e3f2e", + "team": { + "name": "Team 0", + "slug": "team-0" + }, + "workview_url": "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e" + }, + "slots": [ + { + "type": "video", + "slot_name": "0", + "width": 123, + "height": 456, + "thumbnail_url": "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/thumbnail", + "frame_urls": [ + "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/1", + "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/2" + ], + "frame_count": 2, + "source_files": [ + { + "file_name": "file-0", + "url": "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f" + } + ] + } + ] + }, + "annotations": [ + { + "frames": { + "3": { + "bounding_box": { + "h": 2, + "w": 1, + "x": 1, + "y": 1 + }, + "polygon": { + "paths": [ + [ + { "x": 1, "y": 1 }, + { "x": 2, "y": 2 }, + { "x": 1, "y": 3 } + ] + ] + } + } + }, + "id": "f8f5f235-bd47-47be-b4fe-07d49e0177a7", + "interpolate_algorithm": "linear-1.1", + "interpolated": true, + "name": "polygon", + "ranges": [ [ 0, 1 ] ], + "slot_names": [ + "1" + ] + } + ] + } + """ + + directory = tmp_path / "imports" + directory.mkdir() + import_file = directory / "darwin-file.json" + import_file.write_text(content) + + annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None) + + assert annotation_file.path == import_file + assert annotation_file.filename == "item-0.mp4" + assert annotation_file.dataset_name == "Dataset 0" + assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e" + assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="") + + assert len(annotation_file.annotations) == 1 + assert len(annotation_file.annotation_classes) == 1 + assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + assert list(annotation_file.annotations[0].frames.keys()) == [3] + assert annotation_file.annotations[0].frames[3].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7" + assert annotation_file.is_video + assert annotation_file.image_width == 123 + assert annotation_file.image_height == 456 + assert ( + annotation_file.image_url + == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f" + ) + assert ( + annotation_file.workview_url + == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e" + ) + assert not annotation_file.seq + assert len(annotation_file.frame_urls) == 2 + assert annotation_file.remote_path == "/path-0/folder" def it_returns_None_if_no_annotations_exist(tmp_path): content = """