v7labs · whilefalse · Feb 27, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/darwin/datatypes.py b/darwin/datatypes.py
@@ -168,6 +168,9 @@ class Annotation:
     #: Authorship of the annotation (reviewers)
     reviewers: Optional[List[AnnotationAuthor]] = None
 
+    # The darwin ID of this annotation.
+    id: Optional[str] = None
+
     def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]:
         """
         Returns the first SubAnnotation that matches the given type.
@@ -219,6 +222,9 @@ class VideoAnnotation:
     #: Authorship of the annotation (reviewers)
     reviewers: Optional[List[AnnotationAuthor]] = None
 
+    # The darwin ID of this annotation.
+    id: Optional[str] = None
+
     def get_data(
         self,
         only_keyframes: bool = True,
@@ -388,6 +394,9 @@ class AnnotationFile:
     # e.g. (1, 0, 'a')
     version: AnnotationFileVersion = field(default_factory=AnnotationFileVersion)
 
+    # The darwin ID of the item that these annotations belong to.
+    item_id: Optional[str] = None
+
     @property
     def full_path(self) -> str:
         """

diff --git a/darwin/utils.py b/darwin/utils.py
@@ -436,6 +436,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
             version=_parse_version(data),
             path=path,
             filename=item["name"],
+            item_id=item.get("source_info", {}).get("item_id", None),
             dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
             annotation_classes=annotation_classes,
             annotations=annotations,
@@ -456,6 +457,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
             version=_parse_version(data),
             path=path,
             filename=item["name"],
+            item_id=item.get("source_info", {}).get("item_id", None),
             dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
             annotation_classes=annotation_classes,
             annotations=annotations,
@@ -632,6 +634,8 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
         print(f"[WARNING] Unsupported annotation type: '{annotation.keys()}'")
         return None
 
+    if "id" in annotation:
+        main_annotation.id = annotation["id"]
     if "instance_id" in annotation:
         main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"]))
     if "attributes" in annotation:
@@ -662,7 +666,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
     keyframes: Dict[int, bool] = {}
     frames = {**annotation.get("frames", {}), **annotation.get("sections", {})}
     for f, frame in frames.items():
-        frame_annotations[int(f)] = _parse_darwin_annotation({**frame, **{"name": name}})
+        frame_annotations[int(f)] = _parse_darwin_annotation(
+            {**frame, **{"name": name, "id": annotation.get("id", None)}}
+        )
         keyframes[int(f)] = frame.get("keyframe", False)
 
     if not frame_annotations:
@@ -675,6 +681,9 @@ def _parse_darwin_video_annotation(annotation: dict) -> Optional[dt.VideoAnnotat
         slot_names=parse_slot_names(annotation),
     )
 
+    if "id" in annotation:
+        main_annotation.id = annotation["id"]
+
     if "annotators" in annotation:
         main_annotation.annotators = _parse_annotators(annotation["annotators"])
 
@@ -734,6 +743,7 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF
                 frame_url,
                 annotation.workview_url,
                 annotation.seq,
+                item_id=annotation.item_id,
                 slots=annotation.slots,
             )
         )

diff --git a/tests/darwin/utils_test.py b/tests/darwin/utils_test.py
@@ -149,7 +149,7 @@ def it_parses_darwin_images_correctly(tmp_path):
         assert annotation_file.path == import_file
         assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg"
         assert annotation_file.dataset_name == None
-        assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
+        assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")
 
         assert len(annotation_file.annotations) == 2
         assert len(annotation_file.annotation_classes) == 2
@@ -236,7 +236,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
         assert annotation_file.path == import_file
         assert annotation_file.filename == "above tractor.mp4"
         assert annotation_file.dataset_name == None
-        assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix='')
+        assert annotation_file.version == dt.AnnotationFileVersion(major=1, minor=0, suffix="")
 
         assert len(annotation_file.annotations) == 1
         assert len(annotation_file.annotation_classes) == 1
@@ -272,7 +272,7 @@ def it_parses_darwin_videos_correctly(tmp_path):
             )
         ]
 
-    def it_parses_darwin_v2_correctly(tmp_path):
+    def it_parses_darwin_v2_images_correctly(tmp_path):
         content = """
         {
           "version": "2.0",
@@ -355,19 +355,135 @@ def it_parses_darwin_v2_correctly(tmp_path):
         assert annotation_file.path == import_file
         assert annotation_file.filename == "item-0.jpg"
         assert annotation_file.dataset_name == "Dataset 0"
-        assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix='')
+        assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
+        assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")
 
         assert len(annotation_file.annotations) == 1
         assert len(annotation_file.annotation_classes) == 1
+        assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
         assert not annotation_file.is_video
         assert annotation_file.image_width == 123
         assert annotation_file.image_height == 456
-        assert annotation_file.image_url == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
-        assert annotation_file.workview_url == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
+        assert (
+            annotation_file.image_url
+            == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
+        )
+        assert (
+            annotation_file.workview_url
+            == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
+        )
         assert not annotation_file.seq
         assert not annotation_file.frame_urls
         assert annotation_file.remote_path == "/path-0/folder"
 
+    def it_parses_darwin_v2_videos_correctly(tmp_path):
+        content = """
+        {
+          "version": "2.0",
+          "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json_2_0.schema.json",
+          "item": {
+            "name": "item-0.mp4",
+            "path": "/path-0/folder",
+            "source_info": {
+              "dataset": {
+                "name": "Dataset 0",
+                "slug": "dataset-0",
+                "dataset_management_url": "http://example.com/datasets/545/dataset-management"
+              },
+              "item_id": "0185c280-bbad-6117-71a7-a6853a6e3f2e",
+              "team": {
+                "name": "Team 0",
+                "slug": "team-0"
+              },
+              "workview_url": "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
+            },
+            "slots": [
+              {
+                "type": "video",
+                "slot_name": "0",
+                "width": 123,
+                "height": 456,
+                "thumbnail_url": "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/thumbnail",
+                "frame_urls": [
+                  "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/1",
+                  "http://example.com/fake-api-url/v2/teams/v7/files/71857eb3-6feb-428a-8fc6-0c8a895ea611/frames/2"
+                ],
+                "frame_count": 2,
+                "source_files": [
+                  {
+                    "file_name": "file-0",
+                    "url": "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
+                  }
+                ]
+              }
+            ]
+          },
+          "annotations": [
+            {
+              "frames": {
+                "3": {
+                  "bounding_box": {
+                    "h": 2,
+                    "w": 1,
+                    "x": 1,
+                    "y": 1
+                  },
+                  "polygon": {
+                    "paths": [
+                      [
+                        { "x": 1, "y": 1 },
+                        { "x": 2, "y": 2 },
+                        { "x": 1, "y": 3 }
+                      ]
+                    ]
+                  }
+                }
+              },
+              "id": "f8f5f235-bd47-47be-b4fe-07d49e0177a7",
+              "interpolate_algorithm": "linear-1.1",
+              "interpolated": true,
+              "name": "polygon",
+              "ranges": [ [ 0, 1 ] ],
+              "slot_names": [
+                "1"
+              ]
+            }
+          ]
+        }
+        """
+
+        directory = tmp_path / "imports"
+        directory.mkdir()
+        import_file = directory / "darwin-file.json"
+        import_file.write_text(content)
+
+        annotation_file: dt.AnnotationFile = parse_darwin_json(import_file, None)
+
+        assert annotation_file.path == import_file
+        assert annotation_file.filename == "item-0.mp4"
+        assert annotation_file.dataset_name == "Dataset 0"
+        assert annotation_file.item_id == "0185c280-bbad-6117-71a7-a6853a6e3f2e"
+        assert annotation_file.version == dt.AnnotationFileVersion(major=2, minor=0, suffix="")
+
+        assert len(annotation_file.annotations) == 1
+        assert len(annotation_file.annotation_classes) == 1
+        assert annotation_file.annotations[0].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
+        assert list(annotation_file.annotations[0].frames.keys()) == [3]
+        assert annotation_file.annotations[0].frames[3].id == "f8f5f235-bd47-47be-b4fe-07d49e0177a7"
+        assert annotation_file.is_video
+        assert annotation_file.image_width == 123
+        assert annotation_file.image_height == 456
+        assert (
+            annotation_file.image_url
+            == "http://example.com/fake-api-url/v2/teams/v7/uploads/43a83276-1abf-483b-877e-6e61349f2d1f"
+        )
+        assert (
+            annotation_file.workview_url
+            == "http://example.com/workview?dataset=545&item=0185c280-bbad-6117-71a7-a6853a6e3f2e"
+        )
+        assert not annotation_file.seq
+        assert len(annotation_file.frame_urls) == 2
+        assert annotation_file.remote_path == "/path-0/folder"
 
     def it_returns_None_if_no_annotations_exist(tmp_path):
         content = """