v7labs · vvihorev · May 9, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/darwin/datatypes.py b/darwin/datatypes.py
@@ -25,9 +25,9 @@
     NDArray = Any  # type:ignore
 
 from darwin.future.data_objects.properties import (
+    PropertyGranularity,
     PropertyType,
     SelectedProperty,
-    PropertyGranularity,
 )
 from darwin.path_utils import construct_full_path, is_properties_enabled, parse_metadata
 
@@ -90,6 +90,15 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType":
         return cls(**json)
 
 
+def sorted_nested_lists(obj: Any) -> Any:
+    if isinstance(obj, dict):
+        return {k: sorted_nested_lists(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return sorted(sorted_nested_lists(x) for x in obj)
+    else:
+        return obj
+
+
 AnnotationType = Literal[  # NB: Some of these are not supported yet
     "bounding_box",
     "polygon",
@@ -108,6 +117,15 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType":
 ]
 
 
+class SubAnnotationType(str, Enum):
+    TEXT = "text"
+    ATTRIBUTES = "attributes"
+    INSTANCE_ID = "instance_id"
+    INFERENCE = "inference"
+    DIRECTIONAL_VECTOR = "directional_vector"
+    MEASURES = "measures"
+
+
 @dataclass
 class Team:
     """
@@ -169,7 +187,7 @@ class SubAnnotation:
     """
 
     #: The type of this ``SubAnnotation``.
-    annotation_type: str
+    annotation_type: SubAnnotationType
 
     #: Any external data, in any format, relevant to this ``SubAnnotation``.
     #: Used for compatibility purposes with external formats.
@@ -346,6 +364,28 @@ def post_processing(
             "hidden_areas": self.hidden_areas,
         }
 
+        # Track all subannotation attributes as a set for each frame
+        last_frame_subannotations: Dict[str, Any] = {}
+
+        for idx in sorted(output["frames"], key=int):
+            frame_data = output["frames"][idx]
+            current_frame_subannotations: Dict[str, Any] = {}
+
+            for subannotation_name in SubAnnotationType:
+                value = frame_data.get(subannotation_name.value)
+                if value is None:
+                    continue
+                current_frame_subannotations[subannotation_name.value] = (
+                    sorted_nested_lists(value)
+                )
+
+            if current_frame_subannotations == last_frame_subannotations:
+                for subannotation in current_frame_subannotations:
+                    frame_data.pop(subannotation)
+            else:
+                last_frame_subannotations.clear()
+                last_frame_subannotations.update(current_frame_subannotations)
+
         return output
 
 
@@ -1315,7 +1355,7 @@ def make_instance_id(value: int) -> SubAnnotation:
     SubAnnotation
         An instance id ``SubAnnotation``.
     """
-    return SubAnnotation("instance_id", value)
+    return SubAnnotation(SubAnnotationType.INSTANCE_ID, value)
 
 
 def make_attributes(attributes: List[str]) -> SubAnnotation:
@@ -1332,7 +1372,7 @@ def make_attributes(attributes: List[str]) -> SubAnnotation:
     SubAnnotation
         An attributes ``SubAnnotation``.
     """
-    return SubAnnotation("attributes", attributes)
+    return SubAnnotation(SubAnnotationType.ATTRIBUTES, attributes)
 
 
 def make_text(text: str) -> SubAnnotation:
@@ -1349,7 +1389,7 @@ def make_text(text: str) -> SubAnnotation:
     SubAnnotation
         A text ``SubAnnotation``.
     """
-    return SubAnnotation("text", text)
+    return SubAnnotation(SubAnnotationType.TEXT, text)
 
 
 def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation:
@@ -1369,7 +1409,7 @@ def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation:
     SubAnnotation
         A text ``SubAnnotation``.
     """
-    return SubAnnotation(type, data)
+    return SubAnnotation(SubAnnotationType(type), data)
 
 
 KeyFrame = Dict[str, Union[int, Annotation]]

diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
@@ -889,10 +889,6 @@ def _parse_darwin_annotation(
         main_annotation.subs.append(
             dt.make_opaque_sub("measures", annotation["measures"])
         )
-    if "auto_annotate" in annotation:
-        main_annotation.subs.append(
-            dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"])
-        )
 
     if annotation.get("annotators") is not None:
         main_annotation.annotators = _parse_annotators(annotation["annotators"])

diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py
@@ -2,16 +2,19 @@
 import shutil
 import tempfile
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict, List, Any
 
 import pytest
 
 from darwin.client import Client
 from darwin.config import Config
 from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2
 from darwin.datatypes import (
+    AnnotationClass,
+    Annotation,
     ObjectStore,
     Point,
+    VideoAnnotation,
     make_polygon,
     parse_property_classes,
     split_paths_by_metadata,
@@ -188,3 +191,186 @@ def test_repr(self, object_store):
             repr(object_store)
             == "ObjectStore(name=test, prefix=test_prefix, readonly=False, provider=aws)"
         )
+
+
+class TestVideoAnnotationGetData:
+    def test_frames_sorted_numerically_for_duplicate_attribute_removal(self):
+        annotation_class = AnnotationClass("test", "polygon")
+        annotation1 = Annotation(annotation_class, {"data": "frame_1"})
+        annotation10 = Annotation(annotation_class, {"data": "frame_10"})
+        annotation2 = Annotation(annotation_class, {"data": "frame_2"})
+        keyframes = {1: True, 10: True, 2: True}
+        segments = [[1, 10]]
+        interpolated = True
+        slot_names = ["main"]
+
+        # Source frames are out of order
+        frames = {
+            1: annotation1,
+            10: annotation10,
+            2: annotation2,
+        }
+
+        video_annotation = VideoAnnotation(
+            annotation_class,
+            frames,
+            keyframes,
+            segments,
+            interpolated,
+            slot_names,
+        )
+
+        def mock_post_processing(
+            annotation: Any, data: Dict[str, Any]
+        ) -> Dict[str, Any]:
+            if annotation == annotation1:
+                data["attributes"] = ["attr1"]
+            elif annotation == annotation2:
+                data["attributes"] = ["attr1"]  # Same as frame 1, should be removed
+            elif annotation == annotation10:
+                data["attributes"] = ["attr10"]  # Different from previous frames
+            return data
+
+        result = video_annotation.get_data(post_processing=mock_post_processing)
+
+        assert "attributes" in result["frames"][1]
+        assert result["frames"][1]["attributes"] == ["attr1"]
+
+        assert (
+            "attributes" not in result["frames"][2]
+        ), "Duplicate attributes should be removed"
+
+        assert "attributes" in result["frames"][10]
+        assert result["frames"][10]["attributes"] == ["attr10"]
+
+    def test_attributes_equality_for_lists_ignores_order(self):
+        annotation1 = Annotation(
+            AnnotationClass("test", "polygon"), {"data": "frame_1"}
+        )
+        annotation2 = Annotation(
+            AnnotationClass("test", "polygon"), {"data": "frame_2"}
+        )
+        annotation_class = AnnotationClass("test", "polygon")
+        keyframes = {1: True, 2: True}
+        segments = [[1, 2]]
+        interpolated = True
+        slot_names = ["main"]
+
+        frames = {
+            1: annotation1,
+            2: annotation2,
+        }
+
+        video_annotation = VideoAnnotation(
+            annotation_class,
+            frames,
+            keyframes,
+            segments,
+            interpolated,
+            slot_names,
+        )
+
+        def mock_post_processing(
+            annotation: Any, data: Dict[str, Any]
+        ) -> Dict[str, Any]:
+            if annotation == annotation1:
+                data["attributes"] = ["attr1", "attr2"]
+            elif annotation == annotation2:
+                data["attributes"] = [
+                    "attr2",
+                    "attr1",
+                ]  # Same elements, different order
+            return data
+
+        result = video_annotation.get_data(post_processing=mock_post_processing)
+
+        assert "attributes" in result["frames"][1]
+        assert result["frames"][1]["attributes"] == ["attr1", "attr2"]
+
+        assert (
+            "attributes" not in result["frames"][2]
+        ), "Different order lists should be considered the same set of attributes"
+
+    def test_all_subannotation_present_if_any_are_changed_none_present_otherwise(self):
+        """Test all subannotation attributes are correctly processed for changes between frames."""
+        annotation_class = AnnotationClass("test", "polygon")
+        annotation1 = Annotation(annotation_class, {"data": "frame_1"})
+        annotation2 = Annotation(annotation_class, {"data": "frame_2"})
+        annotation3 = Annotation(annotation_class, {"data": "frame_3"})
+        annotation4 = Annotation(annotation_class, {"data": "frame_4"})
+
+        keyframes = {1: True, 2: True, 3: True, 4: True}
+        segments = [[1, 4]]
+        interpolated = True
+        slot_names = ["main"]
+
+        frames = {
+            1: annotation1,
+            2: annotation2,
+            3: annotation3,
+            4: annotation4,
+        }
+
+        video_annotation = VideoAnnotation(
+            annotation_class,
+            frames,
+            keyframes,
+            segments,
+            interpolated,
+            slot_names,
+        )
+
+        def mock_post_processing(
+            annotation: Any, data: Dict[str, Any]
+        ) -> Dict[str, Any]:
+            # Frame 1: Set initial values for all attributes
+            if annotation == annotation1:
+                data["text"] = "Initial text"
+                data["attributes"] = ["attr1", "attr2"]
+                data["instance_id"] = 123
+
+            # Frame 2: Keep the same values (should be removed in output)
+            elif annotation == annotation2:
+                data["text"] = "Initial text"
+                data["attributes"] = ["attr1", "attr2"]
+                data["instance_id"] = 123
+
+            # Frame 3: Change only one attribute (text)
+            elif annotation == annotation3:
+                data["text"] = "Updated text"  # Changed from frame 2
+                data["attributes"] = ["attr1", "attr2"]
+                data["instance_id"] = 123
+
+            # Frame 4: Keep the same values from frame 3 (should be removed in output)
+            elif annotation == annotation4:
+                data["text"] = "Updated text"
+                data["attributes"] = ["attr1", "attr2"]
+                data["instance_id"] = 123
+
+            return data
+
+        result = video_annotation.get_data(post_processing=mock_post_processing)
+
+        # Frame 1: All attributes should be present
+        frame1 = result["frames"][1]
+        assert frame1["text"] == "Initial text"
+        assert frame1["attributes"] == ["attr1", "attr2"]
+        assert frame1["instance_id"] == 123
+
+        # Frame 2: All attributes should be removed (unchanged from frame 1)
+        frame2 = result["frames"][2]
+        assert "text" not in frame2
+        assert "attributes" not in frame2
+        assert "instance_id" not in frame2
+
+        # Frame 3: All attributes should be present (text changed from frame 2)
+        frame3 = result["frames"][3]
+        assert frame3["text"] == "Updated text"
+        assert frame3["attributes"] == ["attr1", "attr2"]
+        assert frame3["instance_id"] == 123
+
+        # Frame 4: All attributes should be removed (unchanged from frame 3)
+        frame4 = result["frames"][4]
+        assert "text" not in frame4
+        assert "attributes" not in frame4
+        assert "instance_id" not in frame4
diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py
@@ -739,7 +739,7 @@ def test__get_annotation_data_video_annotation_with_attributes_that_become_empty
     assert result["frames"][4]["attributes"] == {"attributes": []}
 
 
-def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_when_keyframe_is_true() -> (
+def test__get_annotation_data_video_annotation_only_stores_updates_to_sub_annotations_when_keyframe_is_true() -> (
     None
 ):
     from darwin.importer.importer import _get_annotation_data
@@ -785,7 +785,8 @@ def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_whe
     attributes = {"video_class_id": {"attribute_1": "id_1", "attribute_2": "id_2"}}
     result = _get_annotation_data(video_annotation, "video_class_id", attributes)
     assert result["frames"][1]["attributes"] == {"attributes": ["id_1", "id_2"]}
-    assert result["frames"][3]["attributes"] == {"attributes": ["id_1", "id_2"]}
+    assert 2 not in result["frames"]
+    assert result["frames"][3].get("attributes") is None
 
 
 def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation: