diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 8d7736281..56ccf2a73 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass, field from enum import Enum, auto from pathlib import Path @@ -20,7 +22,7 @@ except ImportError: NDArray = Any # type:ignore -from darwin.path_utils import construct_full_path +from darwin.path_utils import construct_full_path, is_properties_enabled, parse_metadata # Utility types @@ -386,6 +388,96 @@ def __str__(self) -> str: return f"{self.major}.{self.minor}{self.suffix}" +@dataclass +class Property: + """ + Represents a property of an annotation file. + """ + + # Name of the property + name: str + + # Type of the property + type: str + + # Whether the property is required or not + required: bool + + # Property options + options: list[dict[str, str]] + + +@dataclass +class PropertyClass: + name: str + type: str + description: Optional[str] + color: Optional[str] = None + sub_types: Optional[list[str]] = None + properties: Optional[list[Property]] = None + + +def parse_property_classes(metadata: dict[str, Any]) -> list[PropertyClass]: + """ + Parses the metadata file and returns a list of PropertyClass objects. + + Parameters + ---------- + metadata : dict[str, Any] + The metadata file. + + Returns + ------- + list[PropertyClass] + A list of PropertyClass objects. + """ + assert "classes" in metadata, "Metadata does not contain classes" + + classes = [] + for metadata_cls in metadata["classes"]: + assert ( + "properties" in metadata_cls + ), "Metadata class does not contain properties" + classes.append( + PropertyClass( + name=metadata_cls["name"], + type=metadata_cls["type"], + description=metadata_cls.get("description"), + color=metadata_cls.get("color"), + sub_types=metadata_cls.get("sub_types"), + properties=[Property(**p) for p in metadata_cls["properties"]], + ) + ) + + return classes + + +def split_paths_by_metadata( + path, dir: str = ".v7", filename: str = "metadata.json" +) -> tuple[Path, Optional[list[PropertyClass]]]: + """ + Splits the given path into two: the path to the metadata file and the path to the properties + + Parameters + ---------- + path : Path + The path to the export directory. + + Returns + ------- + tuple[Path, Optional[list[PropertyClass]]] + A tuple containing the path to the metadata file and the list of property classes. + """ + if not is_properties_enabled(path, dir, filename): + return path, None + + metadata_path = path / dir / filename + metadata = parse_metadata(metadata_path) + property_classes = parse_property_classes(metadata) + + return metadata_path, property_classes + + @dataclass class AnnotationFile: """ diff --git a/darwin/path_utils.py b/darwin/path_utils.py index c1a4149a3..6d4197d3b 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -1,4 +1,7 @@ -from pathlib import PurePosixPath +from __future__ import annotations + +import json +from pathlib import Path, PurePosixPath from typing import Optional, Tuple @@ -41,3 +44,56 @@ def deconstruct_full_path(filename: str) -> Tuple[str, str]: """ posix_path = PurePosixPath("/") / filename return str(posix_path.parent), posix_path.name + + +def parse_metadata(path: Path) -> dict: + """ + Returns the parsed metadata file. + + Parameters + ---------- + path : Path + The path to the metadata file. + + Returns + ------- + dict + The parsed metadata file. + """ + with open(path) as f: + metadata = json.load(f) + + return metadata + + +def is_properties_enabled( + export_dir_path: Path, + dir: str = ".v7", + filename: str = "metadata.json", + annotations_dir: str = "annotations", +) -> bool: + """ + Returns whether the given export directory has properties enabled. + + Parameters + ---------- + export_dir_path : Path + The path to the export directory. + + Returns + ------- + bool + Whether the given export directory has properties enabled. + """ + path = export_dir_path / dir + if not path.exists(): + annotations_path = export_dir_path / annotations_dir + for annotation_path in annotations_path.rglob("*"): + with open(annotation_path) as f: + if '"properties"' in f.read(): + return True + return False + + metadata_path = path / filename + metadata_classes = parse_metadata(metadata_path).get("classes", []) + return any(_cls.get("properties") for _cls in metadata_classes) diff --git a/tests/darwin/data/annotation_with_properties.json b/tests/darwin/data/annotation_with_properties.json new file mode 100644 index 000000000..ea2b9399b --- /dev/null +++ b/tests/darwin/data/annotation_with_properties.json @@ -0,0 +1,328 @@ +{ + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "221b-1.jpeg", + "path": "/", + "source_info": { + "item_id": "018c3a41-9aca-0fea-e62d-9a64ebd1a147", + "team": { + "name": "Rafal's Team", + "slug": "rafals-team" + }, + "dataset": { + "name": "saurabh-test", + "slug": "saurabh-test", + "dataset_management_url": "https://staging.v7labs.com/datasets/60844/dataset-management" + }, + "workview_url": "https://staging.v7labs.com/workview?dataset=60844&item=018c3a41-9aca-0fea-e62d-9a64ebd1a147" + }, + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 192, + "height": 263, + "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/6c88bb6a-b108-47c7-a9bf-754e9bd32330/thumbnail", + "source_files": [ + { + "file_name": "221b-1.jpeg", + "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/uploads/403ef007-817f-4df5-8566-e81b7adb06a2" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 12.0, + "w": 3.0, + "x": 101.0, + "y": 89.0 + }, + "id": "3893e60c-436b-47a8-9544-ff60d018a694", + "name": "1", + "polygon": { + "paths": [ + [ + { + "x": 102.0, + "y": 89.0 + }, + { + "x": 101.0, + "y": 90.0 + }, + { + "x": 102.0, + "y": 91.0 + }, + { + "x": 102.0, + "y": 101.0 + }, + { + "x": 104.0, + "y": 101.0 + }, + { + "x": 103.0, + "y": 100.0 + }, + { + "x": 103.0, + "y": 89.0 + } + ] + ] + }, + "properties": [ + { + "frame_index": 0, + "name": "1.2", + "type": "string", + "value": "1.2.2" + }, + { + "frame_index": 0, + "name": "1.1", + "type": "string", + "value": "1.1.2" + }, + { + "frame_index": 0, + "name": "1.3", + "type": "string", + "value": "1.3.1" + }, + { + "frame_index": 0, + "name": "1.1", + "type": "string", + "value": "1.1.1" + }, + { + "frame_index": 0, + "name": "1.2", + "type": "string", + "value": "1.2.1" + } + ], + "slot_names": [ + "0" + ], + "text": { + "text": "hello" + } + }, + { + "bounding_box": { + "h": 11.0, + "w": 7.0, + "x": 91.0, + "y": 89.0 + }, + "id": "2c6097ae-510e-4ed9-af1f-2bf0fc71021b", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 92.0, + "y": 89.0 + }, + { + "x": 92.0, + "y": 91.0 + }, + { + "x": 93.0, + "y": 91.0 + }, + { + "x": 94.0, + "y": 90.0 + }, + { + "x": 95.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 92.0 + }, + { + "x": 97.0, + "y": 94.0 + }, + { + "x": 95.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 97.0 + }, + { + "x": 91.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 99.0 + }, + { + "x": 94.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 98.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 97.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 97.0 + }, + { + "x": 98.0, + "y": 95.0 + }, + { + "x": 98.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 89.0 + } + ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + }, + { + "bounding_box": { + "h": 12.0, + "w": 8.0, + "x": 80.0, + "y": 88.0 + }, + "id": "3358878c-c473-4d97-868f-ac7afa91de81", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 80.0, + "y": 88.0 + }, + { + "x": 80.0, + "y": 90.0 + }, + { + "x": 81.0, + "y": 89.0 + }, + { + "x": 82.0, + "y": 89.0 + }, + { + "x": 83.0, + "y": 88.0 + }, + { + "x": 84.0, + "y": 88.0 + }, + { + "x": 87.0, + "y": 91.0 + }, + { + "x": 87.0, + "y": 92.0 + }, + { + "x": 80.0, + "y": 99.0 + }, + { + "x": 80.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 98.0 + }, + { + "x": 87.0, + "y": 99.0 + }, + { + "x": 85.0, + "y": 99.0 + }, + { + "x": 84.0, + "y": 98.0 + }, + { + "x": 84.0, + "y": 97.0 + }, + { + "x": 88.0, + "y": 93.0 + }, + { + "x": 88.0, + "y": 90.0 + }, + { + "x": 86.0, + "y": 88.0 + } + ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/annotation_without_properties.json b/tests/darwin/data/annotation_without_properties.json new file mode 100644 index 000000000..491353d8d --- /dev/null +++ b/tests/darwin/data/annotation_without_properties.json @@ -0,0 +1,294 @@ +{ + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "221b-1.jpeg", + "path": "/", + "source_info": { + "item_id": "018c3a41-9aca-0fea-e62d-9a64ebd1a147", + "team": { + "name": "Rafal's Team", + "slug": "rafals-team" + }, + "dataset": { + "name": "saurabh-test", + "slug": "saurabh-test", + "dataset_management_url": "https://staging.v7labs.com/datasets/60844/dataset-management" + }, + "workview_url": "https://staging.v7labs.com/workview?dataset=60844&item=018c3a41-9aca-0fea-e62d-9a64ebd1a147" + }, + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 192, + "height": 263, + "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/6c88bb6a-b108-47c7-a9bf-754e9bd32330/thumbnail", + "source_files": [ + { + "file_name": "221b-1.jpeg", + "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/uploads/403ef007-817f-4df5-8566-e81b7adb06a2" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 12.0, + "w": 3.0, + "x": 101.0, + "y": 89.0 + }, + "id": "3893e60c-436b-47a8-9544-ff60d018a694", + "name": "1", + "polygon": { + "paths": [ + [ + { + "x": 102.0, + "y": 89.0 + }, + { + "x": 101.0, + "y": 90.0 + }, + { + "x": 102.0, + "y": 91.0 + }, + { + "x": 102.0, + "y": 101.0 + }, + { + "x": 104.0, + "y": 101.0 + }, + { + "x": 103.0, + "y": 100.0 + }, + { + "x": 103.0, + "y": 89.0 + } + ] + ] + }, + "slot_names": [ + "0" + ], + "text": { + "text": "hello" + } + }, + { + "bounding_box": { + "h": 11.0, + "w": 7.0, + "x": 91.0, + "y": 89.0 + }, + "id": "2c6097ae-510e-4ed9-af1f-2bf0fc71021b", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 92.0, + "y": 89.0 + }, + { + "x": 92.0, + "y": 91.0 + }, + { + "x": 93.0, + "y": 91.0 + }, + { + "x": 94.0, + "y": 90.0 + }, + { + "x": 95.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 92.0 + }, + { + "x": 97.0, + "y": 94.0 + }, + { + "x": 95.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 97.0 + }, + { + "x": 91.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 99.0 + }, + { + "x": 94.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 98.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 97.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 97.0 + }, + { + "x": 98.0, + "y": 95.0 + }, + { + "x": 98.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 89.0 + } + ] + ] + }, + "slot_names": [ + "0" + ] + }, + { + "bounding_box": { + "h": 12.0, + "w": 8.0, + "x": 80.0, + "y": 88.0 + }, + "id": "3358878c-c473-4d97-868f-ac7afa91de81", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 80.0, + "y": 88.0 + }, + { + "x": 80.0, + "y": 90.0 + }, + { + "x": 81.0, + "y": 89.0 + }, + { + "x": 82.0, + "y": 89.0 + }, + { + "x": 83.0, + "y": 88.0 + }, + { + "x": 84.0, + "y": 88.0 + }, + { + "x": 87.0, + "y": 91.0 + }, + { + "x": 87.0, + "y": 92.0 + }, + { + "x": 80.0, + "y": 99.0 + }, + { + "x": 80.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 98.0 + }, + { + "x": 87.0, + "y": 99.0 + }, + { + "x": 85.0, + "y": 99.0 + }, + { + "x": 84.0, + "y": 98.0 + }, + { + "x": 84.0, + "y": 97.0 + }, + { + "x": 88.0, + "y": 93.0 + }, + { + "x": 88.0, + "y": 90.0 + }, + { + "x": 86.0, + "y": 88.0 + } + ] + ] + }, + "slot_names": [ + "0" + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/metadata.json b/tests/darwin/data/metadata.json new file mode 100644 index 000000000..861ed2f55 --- /dev/null +++ b/tests/darwin/data/metadata.json @@ -0,0 +1,52 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": ["attributes", "instance_id"], + "color": "rgba(255,0,85,1.0)", + "properties": [ + { + "name": "Colors", + "type": "multi-select", + "options": [ + { + "value": "red", + "color": "rgba(255, 0, 0, 0)", + "type": "string" + }, + { + "value": "green", + "color": "rgba(0, 255, 0, 0)", + "type": "string" + }, + { + "value": "blue", + "color": "rgba(0, 0, 255, 0)", + "type": "string" + } + ], + "required": true + }, + { + "name": "Shape (expanded format)", + "type": "single-select", + "options": [ + { + "value": "Star", + "color": "rgba(0, 0, 0, 0)", + "type": "string" + }, + { + "value": "Circle", + "color": "rgba(150, 150, 150, 0)", + "type": "string" + } + ], + "required": false + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/metadata_empty_properties.json b/tests/darwin/data/metadata_empty_properties.json new file mode 100644 index 000000000..ea386bca8 --- /dev/null +++ b/tests/darwin/data/metadata_empty_properties.json @@ -0,0 +1,37 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle1", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle3", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/metadata_nested_properties.json b/tests/darwin/data/metadata_nested_properties.json new file mode 100644 index 000000000..ba7d430b0 --- /dev/null +++ b/tests/darwin/data/metadata_nested_properties.json @@ -0,0 +1,60 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle1", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle3", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [ + { + "name": "Colors", + "type": "multi-select", + "options": [ + { + "value": "red", + "color": "rgba(255, 0, 0, 0)", + "type": "string" + }, + { + "value": "green", + "color": "rgba(0, 255, 0, 0)", + "type": "string" + }, + { + "value": "blue", + "color": "rgba(0, 0, 255, 0)", + "type": "string" + } + ], + "required": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index 1311d6867..784423800 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -1,6 +1,18 @@ +import json +import shutil +import tempfile +from pathlib import Path from typing import Dict, List -from darwin.datatypes import Point, make_complex_polygon, make_polygon +import pytest + +from darwin.datatypes import ( + Point, + make_complex_polygon, + make_polygon, + parse_property_classes, + split_paths_by_metadata, +) class TestMakePolygon: @@ -65,3 +77,54 @@ def assert_annotation_class(annotation, name, type, internal_type=None) -> None: assert annotation.annotation_class.name == name assert annotation.annotation_class.annotation_type == type assert annotation.annotation_class.annotation_internal_type == internal_type + + +@pytest.mark.parametrize( + ("filename", "property_class_n", "properties_n"), + ( + ("metadata.json", 1, [2]), + ("metadata_nested_properties.json", 3, [0, 0, 1]), + ("metadata_empty_properties.json", 3, [0, 0, 0]), + ), +) +def test_parse_properties(filename, property_class_n, properties_n): + manifest_path = Path(__file__).parent / f"data/{filename}" + + with open(manifest_path) as f: + manifest = json.load(f) + + property_classes = parse_property_classes(manifest) + assert len(property_classes) == property_class_n + assert [ + len(property_class.properties or []) for property_class in property_classes + ] == properties_n + + +@pytest.mark.parametrize( + ("filename", "property_class_n", "properties_n", "is_properties_enabled"), + ( + ("metadata.json", 1, [2], True), + ("metadata_nested_properties.json", 3, [0, 0, 1], True), + ("metadata_empty_properties.json", 0, [], False), + ), +) +def test_split_paths_by_manifest( + filename, property_class_n, properties_n, is_properties_enabled +): + manifest_path = Path(__file__).parent / f"data/{filename}" + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + tmpdir_v7 = tmpdir / ".v7" + tmpdir_v7.mkdir(exist_ok=True) + shutil.copy(manifest_path, tmpdir_v7) + + _path, property_classes = split_paths_by_metadata(tmpdir, filename=filename) + + is_path_file = _path.is_file() + assert is_path_file == is_properties_enabled + assert len(property_classes or []) == property_class_n + assert [ + len(property_class.properties or []) + for property_class in property_classes or [] + ] == properties_n diff --git a/tests/darwin/path_utils_test.py b/tests/darwin/path_utils_test.py index 04b6b002a..838a106ab 100644 --- a/tests/darwin/path_utils_test.py +++ b/tests/darwin/path_utils_test.py @@ -1,6 +1,15 @@ -from pathlib import PurePosixPath +import shutil +import tempfile +from pathlib import Path, PurePosixPath -from darwin.path_utils import construct_full_path, deconstruct_full_path +import pytest + +from darwin.path_utils import ( + construct_full_path, + deconstruct_full_path, + is_properties_enabled, + parse_metadata, +) def test_path_construction(): @@ -34,3 +43,50 @@ def test_path_deconstruction(): assert ("/a/b", "test.png") == deconstruct_full_path("/a/b/test.png") assert ("/", "test.png") == deconstruct_full_path("test.png") assert ("/", "test.png") == deconstruct_full_path("/test.png") + + +def test_parse_metadata(): + metadata_path = Path(__file__).parent / "data/metadata.json" + metadata = parse_metadata(metadata_path) + + # check that the metadata is parsed correctly + assert len(metadata["classes"]) == 1 + assert len(metadata["classes"][0]["properties"]) == 2 + + +@pytest.mark.parametrize( + ("filename", "expected_bool"), + ( + ("annotation_with_properties.json", True), + ("annotation_without_properties.json", False), + ), +) +def test_is_properties_enabled(filename, expected_bool): + annotation_path = Path(__file__).parent / f"data/{filename}" + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + tmpdir_annotations = tmpdir / "annotations" + tmpdir_annotations.mkdir(exist_ok=True) + shutil.copy(annotation_path, tmpdir_annotations) + + assert is_properties_enabled(tmpdir) == expected_bool + + +@pytest.mark.parametrize( + ("filename", "expected_bool"), + ( + ("metadata.json", True), + ("metadata_nested_properties.json", True), + ("metadata_empty_properties.json", False), + ), +) +def test_is_properties_enabled_v7(filename, expected_bool): + metadata_path = Path(__file__).parent / f"data/{filename}" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + tmpdir_v7 = tmpdir / ".v7" + tmpdir_v7.mkdir(exist_ok=True) + shutil.copy(metadata_path, tmpdir_v7) + + assert is_properties_enabled(tmpdir, filename=filename) == expected_bool