Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 46 additions & 6 deletions darwin/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
NDArray = Any # type:ignore

from darwin.future.data_objects.properties import (
PropertyGranularity,
PropertyType,
SelectedProperty,
PropertyGranularity,
)
from darwin.path_utils import construct_full_path, is_properties_enabled, parse_metadata

Expand Down Expand Up @@ -90,6 +90,15 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType":
return cls(**json)


def sorted_nested_lists(obj: Any) -> Any:
if isinstance(obj, dict):
return {k: sorted_nested_lists(v) for k, v in obj.items()}
elif isinstance(obj, list):
return sorted(sorted_nested_lists(x) for x in obj)
else:
return obj


AnnotationType = Literal[ # NB: Some of these are not supported yet
"bounding_box",
"polygon",
Expand All @@ -108,6 +117,15 @@ def from_dict(cls, json: JSONFreeForm) -> "JSONType":
]


class SubAnnotationType(str, Enum):
TEXT = "text"
ATTRIBUTES = "attributes"
INSTANCE_ID = "instance_id"
INFERENCE = "inference"
DIRECTIONAL_VECTOR = "directional_vector"
MEASURES = "measures"


@dataclass
class Team:
"""
Expand Down Expand Up @@ -169,7 +187,7 @@ class SubAnnotation:
"""

#: The type of this ``SubAnnotation``.
annotation_type: str
annotation_type: SubAnnotationType

#: Any external data, in any format, relevant to this ``SubAnnotation``.
#: Used for compatibility purposes with external formats.
Expand Down Expand Up @@ -346,6 +364,28 @@ def post_processing(
"hidden_areas": self.hidden_areas,
}

# Track all subannotation attributes as a set for each frame
last_frame_subannotations: Dict[str, Any] = {}

for idx in sorted(output["frames"], key=int):
frame_data = output["frames"][idx]
current_frame_subannotations: Dict[str, Any] = {}

for subannotation_name in SubAnnotationType:
value = frame_data.get(subannotation_name.value)
if value is None:
continue
current_frame_subannotations[subannotation_name.value] = (
sorted_nested_lists(value)
)

if current_frame_subannotations == last_frame_subannotations:
for subannotation in current_frame_subannotations:
frame_data.pop(subannotation)
else:
last_frame_subannotations.clear()
last_frame_subannotations.update(current_frame_subannotations)

return output


Expand Down Expand Up @@ -1315,7 +1355,7 @@ def make_instance_id(value: int) -> SubAnnotation:
SubAnnotation
An instance id ``SubAnnotation``.
"""
return SubAnnotation("instance_id", value)
return SubAnnotation(SubAnnotationType.INSTANCE_ID, value)


def make_attributes(attributes: List[str]) -> SubAnnotation:
Expand All @@ -1332,7 +1372,7 @@ def make_attributes(attributes: List[str]) -> SubAnnotation:
SubAnnotation
An attributes ``SubAnnotation``.
"""
return SubAnnotation("attributes", attributes)
return SubAnnotation(SubAnnotationType.ATTRIBUTES, attributes)


def make_text(text: str) -> SubAnnotation:
Expand All @@ -1349,7 +1389,7 @@ def make_text(text: str) -> SubAnnotation:
SubAnnotation
A text ``SubAnnotation``.
"""
return SubAnnotation("text", text)
return SubAnnotation(SubAnnotationType.TEXT, text)


def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation:
Expand All @@ -1369,7 +1409,7 @@ def make_opaque_sub(type: str, data: UnknownType) -> SubAnnotation:
SubAnnotation
A text ``SubAnnotation``.
"""
return SubAnnotation(type, data)
return SubAnnotation(SubAnnotationType(type), data)


KeyFrame = Dict[str, Union[int, Annotation]]
Expand Down
4 changes: 0 additions & 4 deletions darwin/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,10 +889,6 @@ def _parse_darwin_annotation(
main_annotation.subs.append(
dt.make_opaque_sub("measures", annotation["measures"])
)
if "auto_annotate" in annotation:
main_annotation.subs.append(
dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"])
)

if annotation.get("annotators") is not None:
main_annotation.annotators = _parse_annotators(annotation["annotators"])
Expand Down
188 changes: 187 additions & 1 deletion tests/darwin/datatypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@
import shutil
import tempfile
from pathlib import Path
from typing import Dict, List
from typing import Dict, List, Any

import pytest

from darwin.client import Client
from darwin.config import Config
from darwin.dataset.remote_dataset_v2 import RemoteDatasetV2
from darwin.datatypes import (
AnnotationClass,
Annotation,
ObjectStore,
Point,
VideoAnnotation,
make_polygon,
parse_property_classes,
split_paths_by_metadata,
Expand Down Expand Up @@ -188,3 +191,186 @@ def test_repr(self, object_store):
repr(object_store)
== "ObjectStore(name=test, prefix=test_prefix, readonly=False, provider=aws)"
)


class TestVideoAnnotationGetData:
def test_frames_sorted_numerically_for_duplicate_attribute_removal(self):
annotation_class = AnnotationClass("test", "polygon")
annotation1 = Annotation(annotation_class, {"data": "frame_1"})
annotation10 = Annotation(annotation_class, {"data": "frame_10"})
annotation2 = Annotation(annotation_class, {"data": "frame_2"})
keyframes = {1: True, 10: True, 2: True}
segments = [[1, 10]]
interpolated = True
slot_names = ["main"]

# Source frames are out of order
frames = {
1: annotation1,
10: annotation10,
2: annotation2,
}

video_annotation = VideoAnnotation(
annotation_class,
frames,
keyframes,
segments,
interpolated,
slot_names,
)

def mock_post_processing(
annotation: Any, data: Dict[str, Any]
) -> Dict[str, Any]:
if annotation == annotation1:
data["attributes"] = ["attr1"]
elif annotation == annotation2:
data["attributes"] = ["attr1"] # Same as frame 1, should be removed
elif annotation == annotation10:
data["attributes"] = ["attr10"] # Different from previous frames
return data

result = video_annotation.get_data(post_processing=mock_post_processing)

assert "attributes" in result["frames"][1]
assert result["frames"][1]["attributes"] == ["attr1"]

assert (
"attributes" not in result["frames"][2]
), "Duplicate attributes should be removed"

assert "attributes" in result["frames"][10]
assert result["frames"][10]["attributes"] == ["attr10"]

def test_attributes_equality_for_lists_ignores_order(self):
annotation1 = Annotation(
AnnotationClass("test", "polygon"), {"data": "frame_1"}
)
annotation2 = Annotation(
AnnotationClass("test", "polygon"), {"data": "frame_2"}
)
annotation_class = AnnotationClass("test", "polygon")
keyframes = {1: True, 2: True}
segments = [[1, 2]]
interpolated = True
slot_names = ["main"]

frames = {
1: annotation1,
2: annotation2,
}

video_annotation = VideoAnnotation(
annotation_class,
frames,
keyframes,
segments,
interpolated,
slot_names,
)

def mock_post_processing(
annotation: Any, data: Dict[str, Any]
) -> Dict[str, Any]:
if annotation == annotation1:
data["attributes"] = ["attr1", "attr2"]
elif annotation == annotation2:
data["attributes"] = [
"attr2",
"attr1",
] # Same elements, different order
return data

result = video_annotation.get_data(post_processing=mock_post_processing)

assert "attributes" in result["frames"][1]
assert result["frames"][1]["attributes"] == ["attr1", "attr2"]

assert (
"attributes" not in result["frames"][2]
), "Different order lists should be considered the same set of attributes"

def test_all_subannotation_present_if_any_are_changed_none_present_otherwise(self):
"""Test all subannotation attributes are correctly processed for changes between frames."""
annotation_class = AnnotationClass("test", "polygon")
annotation1 = Annotation(annotation_class, {"data": "frame_1"})
annotation2 = Annotation(annotation_class, {"data": "frame_2"})
annotation3 = Annotation(annotation_class, {"data": "frame_3"})
annotation4 = Annotation(annotation_class, {"data": "frame_4"})

keyframes = {1: True, 2: True, 3: True, 4: True}
segments = [[1, 4]]
interpolated = True
slot_names = ["main"]

frames = {
1: annotation1,
2: annotation2,
3: annotation3,
4: annotation4,
}

video_annotation = VideoAnnotation(
annotation_class,
frames,
keyframes,
segments,
interpolated,
slot_names,
)

def mock_post_processing(
annotation: Any, data: Dict[str, Any]
) -> Dict[str, Any]:
# Frame 1: Set initial values for all attributes
if annotation == annotation1:
data["text"] = "Initial text"
data["attributes"] = ["attr1", "attr2"]
data["instance_id"] = 123

# Frame 2: Keep the same values (should be removed in output)
elif annotation == annotation2:
data["text"] = "Initial text"
data["attributes"] = ["attr1", "attr2"]
data["instance_id"] = 123

# Frame 3: Change only one attribute (text)
elif annotation == annotation3:
data["text"] = "Updated text" # Changed from frame 2
data["attributes"] = ["attr1", "attr2"]
data["instance_id"] = 123

# Frame 4: Keep the same values from frame 3 (should be removed in output)
elif annotation == annotation4:
data["text"] = "Updated text"
data["attributes"] = ["attr1", "attr2"]
data["instance_id"] = 123

return data

result = video_annotation.get_data(post_processing=mock_post_processing)

# Frame 1: All attributes should be present
frame1 = result["frames"][1]
assert frame1["text"] == "Initial text"
assert frame1["attributes"] == ["attr1", "attr2"]
assert frame1["instance_id"] == 123

# Frame 2: All attributes should be removed (unchanged from frame 1)
frame2 = result["frames"][2]
assert "text" not in frame2
assert "attributes" not in frame2
assert "instance_id" not in frame2

# Frame 3: All attributes should be present (text changed from frame 2)
frame3 = result["frames"][3]
assert frame3["text"] == "Updated text"
assert frame3["attributes"] == ["attr1", "attr2"]
assert frame3["instance_id"] == 123

# Frame 4: All attributes should be removed (unchanged from frame 3)
frame4 = result["frames"][4]
assert "text" not in frame4
assert "attributes" not in frame4
assert "instance_id" not in frame4
5 changes: 3 additions & 2 deletions tests/darwin/importer/importer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def test__get_annotation_data_video_annotation_with_attributes_that_become_empty
assert result["frames"][4]["attributes"] == {"attributes": []}


def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_when_keyframe_is_true() -> (
def test__get_annotation_data_video_annotation_only_stores_updates_to_sub_annotations_when_keyframe_is_true() -> (
None
):
from darwin.importer.importer import _get_annotation_data
Expand Down Expand Up @@ -785,7 +785,8 @@ def test__get_annotation_data_video_annotation_does_not_wipe_sub_annotations_whe
attributes = {"video_class_id": {"attribute_1": "id_1", "attribute_2": "id_2"}}
result = _get_annotation_data(video_annotation, "video_class_id", attributes)
assert result["frames"][1]["attributes"] == {"attributes": ["id_1", "id_2"]}
assert result["frames"][3]["attributes"] == {"attributes": ["id_1", "id_2"]}
assert 2 not in result["frames"]
assert result["frames"][3].get("attributes") is None


def __expectation_factory(i: int, slot_names: List[str]) -> dt.Annotation:
Expand Down