From af740a1fa008a5ba70e3145dda3325c1b8ec8603 Mon Sep 17 00:00:00 2001 From: Valentin Vikhorev Date: Wed, 7 May 2025 12:04:09 +0200 Subject: [PATCH 1/5] Only put attributes in the payload if the attributes have changed --- darwin/datatypes.py | 8 ++++++++ tests/darwin/importer/importer_test.py | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index f931a8ca1..a5cd94015 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -346,6 +346,14 @@ def post_processing( "hidden_areas": self.hidden_areas, } + # Only put attributes in the payload if the attributes have changed this frame + last_attributes = None + for idx, frame in output["frames"].items(): + attributes = frame.get("attributes") + if attributes is not None and attributes == last_attributes: + output["frames"][idx].pop("attributes") + last_attributes = attributes + return output diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index 5840f87f0..68789328c 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -739,7 +739,7 @@ def test__get_annotation_data_video_annotation_with_attributes_that_become_empty assert result["frames"][4]["attributes"] == {"attributes": []} -def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_when_keyframe_is_true() -> ( +def test__get_annotation_data_video_annotation_only_stores_updates_to_sub_annotations_when_keyframe_is_true() -> ( None ): from darwin.importer.importer import _get_annotation_data @@ -785,7 +785,8 @@ def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_whe attributes = {"video_class_id": {"attribute_1": "id_1", "attribute_2": "id_2"}} result = _get_annotation_data(video_annotation, "video_class_id", attributes) assert result["frames"][1]["attributes"] == {"attributes": ["id_1", "id_2"]} - assert result["frames"][3]["attributes"] == {"attributes": ["id_1", "id_2"]} + assert result["frames"].get(2) == None + assert result["frames"][3].get("attributes") == None def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation: From f07f9590df1cbd0086e7e8e88d507be20fdae300 Mon Sep 17 00:00:00 2001 From: Valentin Vikhorev <33204359+vvihorev@users.noreply.github.com> Date: Wed, 7 May 2025 12:14:30 +0200 Subject: [PATCH 2/5] Update tests/darwin/importer/importer_test.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/darwin/importer/importer_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index 68789328c..bd1231631 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -785,7 +785,7 @@ def test__get_annotation_data_video_annotation_only_stores_updates_to_sub_annota attributes = {"video_class_id": {"attribute_1": "id_1", "attribute_2": "id_2"}} result = _get_annotation_data(video_annotation, "video_class_id", attributes) assert result["frames"][1]["attributes"] == {"attributes": ["id_1", "id_2"]} - assert result["frames"].get(2) == None + assert 2 not in result["frames"] assert result["frames"][3].get("attributes") == None From 6d5e98e6b0d87acb6bf322caa481347ad2316aaa Mon Sep 17 00:00:00 2001 From: Valentin Vikhorev Date: Wed, 7 May 2025 12:20:31 +0200 Subject: [PATCH 3/5] Ensure frames are sorted, fix None comparison --- darwin/datatypes.py | 4 ++-- tests/darwin/importer/importer_test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index a5cd94015..576af26f8 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -348,8 +348,8 @@ def post_processing( # Only put attributes in the payload if the attributes have changed this frame last_attributes = None - for idx, frame in output["frames"].items(): - attributes = frame.get("attributes") + for idx in sorted(output["frames"]): + attributes = output["frames"][idx].get("attributes") if attributes is not None and attributes == last_attributes: output["frames"][idx].pop("attributes") last_attributes = attributes diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index bd1231631..8725b7d50 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -786,7 +786,7 @@ def test__get_annotation_data_video_annotation_only_stores_updates_to_sub_annota result = _get_annotation_data(video_annotation, "video_class_id", attributes) assert result["frames"][1]["attributes"] == {"attributes": ["id_1", "id_2"]} assert 2 not in result["frames"] - assert result["frames"][3].get("attributes") == None + assert result["frames"][3].get("attributes") is None def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation: From 24807d3194e8b8b2db6555bedc3edbe8db2bcbc0 Mon Sep 17 00:00:00 2001 From: Valentin Vikhorev Date: Thu, 8 May 2025 18:05:45 +0200 Subject: [PATCH 4/5] Treat all subannotations as a set, include in keyframe if any changed, drop all otherwise --- darwin/datatypes.py | 55 ++++++++-- tests/darwin/datatypes_test.py | 188 ++++++++++++++++++++++++++++++++- 2 files changed, 231 insertions(+), 12 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 576af26f8..a6ca107f3 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -90,6 +90,15 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType": return cls(**json) +def sorted_nested_lists(obj: Any) -> Any: + if isinstance(obj, dict): + return {k: sorted_nested_lists(v) for k, v in obj.items()} + elif isinstance(obj, list): + return sorted(sorted_nested_lists(x) for x in obj) + else: + return obj + + AnnotationType = Literal[ # NB: Some of these are not supported yet "bounding_box", "polygon", @@ -108,6 +117,16 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType": ] +class SubAnnotationType(str, Enum): + TEXT = "text" + ATTRIBUTES = "attributes" + INSTANCE_ID = "instance_id" + INFERENCE = "inference" + DIRECTIONAL_VECTOR = "directional_vector" + MEASURES = "measures" + AUTO_ANNOTATE = "auto_annotate" + + @dataclass class Team: """ @@ -169,7 +188,7 @@ class SubAnnotation: """ #: The type of this ``SubAnnotation``. - annotation_type: str + annotation_type: SubAnnotationType #: Any external data, in any format, relevant to this ``SubAnnotation``. #: Used for compatibility purposes with external formats. @@ -346,13 +365,27 @@ def post_processing( "hidden_areas": self.hidden_areas, } - # Only put attributes in the payload if the attributes have changed this frame - last_attributes = None - for idx in sorted(output["frames"]): - attributes = output["frames"][idx].get("attributes") - if attributes is not None and attributes == last_attributes: - output["frames"][idx].pop("attributes") - last_attributes = attributes + # Track all subannotation attributes as a set for each frame + last_frame_subannotations: Dict[str, Any] = {} + + for idx in sorted(output["frames"], key=int): + frame_data = output["frames"][idx] + current_frame_subannotations: Dict[str, Any] = {} + + for subannotation_name in SubAnnotationType: + value = frame_data.get(subannotation_name.value) + if value is None: + continue + current_frame_subannotations[subannotation_name.value] = ( + sorted_nested_lists(value) + ) + + if current_frame_subannotations == last_frame_subannotations: + for subannotation in current_frame_subannotations: + frame_data.pop(subannotation) + else: + last_frame_subannotations.clear() + last_frame_subannotations.update(current_frame_subannotations) return output @@ -1323,7 +1356,7 @@ def make_instance_id(value: int) -> SubAnnotation: SubAnnotation An instance id ``SubAnnotation``. """ - return SubAnnotation("instance_id", value) + return SubAnnotation(SubAnnotationType.INSTANCE_ID, value) def make_attributes(attributes: List[str]) -> SubAnnotation: @@ -1340,7 +1373,7 @@ def make_attributes(attributes: List[str]) -> SubAnnotation: SubAnnotation An attributes ``SubAnnotation``. """ - return SubAnnotation("attributes", attributes) + return SubAnnotation(SubAnnotationType.ATTRIBUTES, attributes) def make_text(text: str) -> SubAnnotation: @@ -1357,7 +1390,7 @@ def make_text(text: str) -> SubAnnotation: SubAnnotation A text ``SubAnnotation``. """ - return SubAnnotation("text", text) + return SubAnnotation(SubAnnotationType.TEXT, text) def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation: diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index e2fb6d662..2d3d37e34 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -2,7 +2,7 @@ import shutil import tempfile from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Any import pytest @@ -10,8 +10,11 @@ from darwin.config import Config from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2 from darwin.datatypes import ( + AnnotationClass, + Annotation, ObjectStore, Point, + VideoAnnotation, make_polygon, parse_property_classes, split_paths_by_metadata, @@ -188,3 +191,186 @@ def test_repr(self, object_store): repr(object_store) == "ObjectStore(name=test, prefix=test_prefix, readonly=False, provider=aws)" ) + + +class TestVideoAnnotationGetData: + def test_frames_sorted_numerically_for_duplicate_attribute_removal(self): + annotation_class = AnnotationClass("test", "polygon") + annotation1 = Annotation(annotation_class, {"data": "frame_1"}) + annotation10 = Annotation(annotation_class, {"data": "frame_10"}) + annotation2 = Annotation(annotation_class, {"data": "frame_2"}) + keyframes = {1: True, 10: True, 2: True} + segments = [[1, 10]] + interpolated = True + slot_names = ["main"] + + # Source frames are out of order + frames = { + 1: annotation1, + 10: annotation10, + 2: annotation2, + } + + video_annotation = VideoAnnotation( + annotation_class, + frames, + keyframes, + segments, + interpolated, + slot_names, + ) + + def mock_post_processing( + annotation: Any, data: Dict[str, Any] + ) -> Dict[str, Any]: + if annotation == annotation1: + data["attributes"] = ["attr1"] + elif annotation == annotation2: + data["attributes"] = ["attr1"] # Same as frame 1, should be removed + elif annotation == annotation10: + data["attributes"] = ["attr10"] # Different from previous frames + return data + + result = video_annotation.get_data(post_processing=mock_post_processing) + + assert "attributes" in result["frames"][1] + assert result["frames"][1]["attributes"] == ["attr1"] + + assert ( + "attributes" not in result["frames"][2] + ), "Duplicate attributes should be removed" + + assert "attributes" in result["frames"][10] + assert result["frames"][10]["attributes"] == ["attr10"] + + def test_attributes_equality_for_lists_ignores_order(self): + annotation1 = Annotation( + AnnotationClass("test", "polygon"), {"data": "frame_1"} + ) + annotation2 = Annotation( + AnnotationClass("test", "polygon"), {"data": "frame_2"} + ) + annotation_class = AnnotationClass("test", "polygon") + keyframes = {1: True, 2: True} + segments = [[1, 2]] + interpolated = True + slot_names = ["main"] + + frames = { + 1: annotation1, + 2: annotation2, + } + + video_annotation = VideoAnnotation( + annotation_class, + frames, + keyframes, + segments, + interpolated, + slot_names, + ) + + def mock_post_processing( + annotation: Any, data: Dict[str, Any] + ) -> Dict[str, Any]: + if annotation == annotation1: + data["attributes"] = ["attr1", "attr2"] + elif annotation == annotation2: + data["attributes"] = [ + "attr2", + "attr1", + ] # Same elements, different order + return data + + result = video_annotation.get_data(post_processing=mock_post_processing) + + assert "attributes" in result["frames"][1] + assert result["frames"][1]["attributes"] == ["attr1", "attr2"] + + assert ( + "attributes" not in result["frames"][2] + ), "Different order lists should be considered the same set of attributes" + + def test_all_subannotation_present_if_any_are_changed_none_present_otherwise(self): + """Test all subannotation attributes are correctly processed for changes between frames.""" + annotation_class = AnnotationClass("test", "polygon") + annotation1 = Annotation(annotation_class, {"data": "frame_1"}) + annotation2 = Annotation(annotation_class, {"data": "frame_2"}) + annotation3 = Annotation(annotation_class, {"data": "frame_3"}) + annotation4 = Annotation(annotation_class, {"data": "frame_4"}) + + keyframes = {1: True, 2: True, 3: True, 4: True} + segments = [[1, 4]] + interpolated = True + slot_names = ["main"] + + frames = { + 1: annotation1, + 2: annotation2, + 3: annotation3, + 4: annotation4, + } + + video_annotation = VideoAnnotation( + annotation_class, + frames, + keyframes, + segments, + interpolated, + slot_names, + ) + + def mock_post_processing( + annotation: Any, data: Dict[str, Any] + ) -> Dict[str, Any]: + # Frame 1: Set initial values for all attributes + if annotation == annotation1: + data["text"] = "Initial text" + data["attributes"] = ["attr1", "attr2"] + data["instance_id"] = 123 + + # Frame 2: Keep the same values (should be removed in output) + elif annotation == annotation2: + data["text"] = "Initial text" + data["attributes"] = ["attr1", "attr2"] + data["instance_id"] = 123 + + # Frame 3: Change only one attribute (text) + elif annotation == annotation3: + data["text"] = "Updated text" # Changed from frame 2 + data["attributes"] = ["attr1", "attr2"] + data["instance_id"] = 123 + + # Frame 4: Keep the same values from frame 3 (should be removed in output) + elif annotation == annotation4: + data["text"] = "Updated text" + data["attributes"] = ["attr1", "attr2"] + data["instance_id"] = 123 + + return data + + result = video_annotation.get_data(post_processing=mock_post_processing) + + # Frame 1: All attributes should be present + frame1 = result["frames"][1] + assert frame1["text"] == "Initial text" + assert frame1["attributes"] == ["attr1", "attr2"] + assert frame1["instance_id"] == 123 + + # Frame 2: All attributes should be removed (unchanged from frame 1) + frame2 = result["frames"][2] + assert "text" not in frame2 + assert "attributes" not in frame2 + assert "instance_id" not in frame2 + + # Frame 3: All attributes should be present (text changed from frame 2) + frame3 = result["frames"][3] + assert frame3["text"] == "Updated text" + assert frame3["attributes"] == ["attr1", "attr2"] + assert frame3["instance_id"] == 123 + + # Frame 4: All attributes should be removed (unchanged from frame 3) + frame4 = result["frames"][4] + assert "text" not in frame4 + assert "attributes" not in frame4 + assert "instance_id" not in frame4 From 0c67e330269ade85090851514afe526ffd26a656 Mon Sep 17 00:00:00 2001 From: Valentin Vikhorev Date: Fri, 9 May 2025 14:49:57 +0200 Subject: [PATCH 5/5] Remove auto_annotate subannotationn type as never present in the exports --- darwin/datatypes.py | 5 ++--- darwin/utils/utils.py | 4 ---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index a6ca107f3..8eeb0f877 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -25,9 +25,9 @@ NDArray = Any # type:ignore from darwin.future.data_objects.properties import ( + PropertyGranularity, PropertyType, SelectedProperty, - PropertyGranularity, ) from darwin.path_utils import construct_full_path, is_properties_enabled, parse_metadata @@ -124,7 +124,6 @@ class SubAnnotationType(str, Enum): INFERENCE = "inference" DIRECTIONAL_VECTOR = "directional_vector" MEASURES = "measures" - AUTO_ANNOTATE = "auto_annotate" @dataclass @@ -1410,7 +1409,7 @@ def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation: SubAnnotation A text ``SubAnnotation``. """ - return SubAnnotation(type, data) + return SubAnnotation(SubAnnotationType(type), data) KeyFrame = Dict[str, Union[int, Annotation]] diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index 7d0549635..2053a295e 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -889,10 +889,6 @@ def _parse_darwin_annotation( main_annotation.subs.append( dt.make_opaque_sub("measures", annotation["measures"]) ) - if "auto_annotate" in annotation: - main_annotation.subs.append( - dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"]) - ) if annotation.get("annotators") is not None: main_annotation.annotators = _parse_annotators(annotation["annotators"])