From 337532a778f6c08fe3566b2cbb0c22a8bc06ec56 Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Tue, 5 Dec 2023 17:00:27 +0000 Subject: [PATCH 1/6] add manifest.json parsing --- darwin/datatypes.py | 27 ++++++++++++++++ darwin/path_utils.py | 73 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 8d7736281..8361700d9 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass, field from enum import Enum, auto from pathlib import Path @@ -213,6 +215,8 @@ class Annotation: # The darwin ID of this annotation. id: Optional[str] = None + properties: Optional[List[Property]] = None + def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]: """ Returns the first SubAnnotation that matches the given type. @@ -267,6 +271,8 @@ class VideoAnnotation: # The darwin ID of this annotation. id: Optional[str] = None + properties: Optional[List[Property]] = None + def get_data( self, only_keyframes: bool = True, @@ -386,6 +392,25 @@ def __str__(self) -> str: return f"{self.major}.{self.minor}{self.suffix}" +@dataclass +class Property: + """ + Represents a property of an annotation file. + """ + + # Name of the property + name: str + + # Type of the property + type: str + + # Whether the property is required or not + required: bool + + # Property options + options: list[dict[str, str]] + + @dataclass class AnnotationFile: """ @@ -455,6 +480,8 @@ class AnnotationFile: # The Frame Count if this is a video annotation frame_count: Optional[int] = None + properties: Optional[List[Property]] = None + @property def full_path(self) -> str: """ diff --git a/darwin/path_utils.py b/darwin/path_utils.py index c1a4149a3..14d33ac55 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -1,5 +1,11 @@ -from pathlib import PurePosixPath -from typing import Optional, Tuple +from __future__ import annotations + +import json +from pathlib import Path, PurePosixPath +from typing import TYPE_CHECKING, Optional, Tuple + +if TYPE_CHECKING: + from darwin.datatypes import Property def construct_full_path(remote_path: Optional[str], filename: str) -> str: @@ -41,3 +47,66 @@ def deconstruct_full_path(filename: str) -> Tuple[str, str]: """ posix_path = PurePosixPath("/") / filename return str(posix_path.parent), posix_path.name + + +def parse_manifest(path: Path) -> list[Property]: + """ + Parses the given manifest and returns a list of all the properties in it. + + Parameters + ---------- + path : str + The path to the manifest. + + Returns + ------- + list[Property] + A list of all the properties in the given manifest. + """ + with open(path) as f: + manifest = json.load(f) + + properties = [] + for m_cls in manifest.get("classes"): + for property in m_cls["properties"]: + properties.append(Property(**property)) + + return properties + + +def is_properties_enabled(path: Path) -> bool: + """ + Returns whether the properties feature is enabled in the given manifest. + + Parameters + ---------- + manifest : list[dict] + The manifest to check. + + Returns + ------- + bool + Whether the properties feature is enabled in the given manifest. + """ + return bool(parse_manifest(path)) + + +def split_paths_by_manifest(path: Path) -> tuple[Path, list[Property] | Path]: + """ + Returns a tuple with the given path and the properties of the manifest of the given path. + + Parameters + ---------- + path : Path + The path to split. + + Returns + ------- + tuple[Path, Optional[list[Property]]] + A tuple with the given path and the properties of the manifest of the given path. + """ + properties = parse_manifest(path) + if not properties: + return path, path + + return path, properties From 87f772febb47b20950948e99be2c2e8024aa071a Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Wed, 6 Dec 2023 11:40:56 +0000 Subject: [PATCH 2/6] add tests --- darwin/datatypes.py | 53 +++++++++++++++- darwin/path_utils.py | 56 ++++++----------- tests/darwin/data/manifest.json | 52 ++++++++++++++++ .../data/manifest_empty_properties.json | 37 ++++++++++++ .../data/manifest_nested_properties.json | 60 +++++++++++++++++++ tests/darwin/datatypes_test.py | 57 +++++++++++++++++- tests/darwin/path_utils_test.py | 13 +++- 7 files changed, 286 insertions(+), 42 deletions(-) create mode 100644 tests/darwin/data/manifest.json create mode 100644 tests/darwin/data/manifest_empty_properties.json create mode 100644 tests/darwin/data/manifest_nested_properties.json diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 8361700d9..427b65d3a 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -22,7 +22,7 @@ except ImportError: NDArray = Any # type:ignore -from darwin.path_utils import construct_full_path +from darwin.path_utils import construct_full_path, is_properties_enabled, parse_manifest # Utility types @@ -411,6 +411,57 @@ class Property: options: list[dict[str, str]] +def parse_properties(manifest: dict[str, Any]) -> list[Property]: + """ + Parses the given manifest and returns a list of all the properties in it. + + Parameters + ---------- + properties : list[dict] + The properties to parse. + + Returns + ------- + list[Property] + A list of all the properties in the given manifest. + """ + assert "classes" in manifest, "Manifest does not contain classes" + + properties = [] + for m_cls in manifest["classes"]: + for property in m_cls["properties"]: + properties.append(Property(**property)) + + return properties + + +def split_paths_by_manifest( + path, dir: str = ".v7", filename: str = "manifest.json" +) -> tuple[Path, Optional[list[Property]]]: + """ + Returns the path to the manifest and the properties of the given path. + + Parameters + ---------- + path : Path + The path to the manifest file, if exists. otherwise the path to the export dir itself. + + Returns + ------- + tuple[Path, Optional[list[Property]]] + A tuple where the first element is the path to the manifest and the second is the list of + properties of the given path. + """ + if not is_properties_enabled(path, dir, filename): + return path, None + + manifest_path = path / dir / filename + manifest = parse_manifest(manifest_path) + properties = parse_properties(manifest) + + return manifest_path, properties + + @dataclass class AnnotationFile: """ diff --git a/darwin/path_utils.py b/darwin/path_utils.py index 14d33ac55..0c3a7a262 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -2,10 +2,7 @@ import json from pathlib import Path, PurePosixPath -from typing import TYPE_CHECKING, Optional, Tuple - -if TYPE_CHECKING: - from darwin.datatypes import Property +from typing import Optional, Tuple def construct_full_path(remote_path: Optional[str], filename: str) -> str: @@ -49,7 +46,7 @@ def deconstruct_full_path(filename: str) -> Tuple[str, str]: return str(posix_path.parent), posix_path.name -def parse_manifest(path: Path) -> list[Property]: +def parse_manifest(path: Path) -> dict: """ Parses the given manifest and returns a list of all the properties in it. @@ -66,47 +63,30 @@ def parse_manifest(path: Path) -> list[Property]: with open(path) as f: manifest = json.load(f) - properties = [] - for m_cls in manifest.get("classes"): - for property in m_cls["properties"]: - properties.append(Property(**property)) - - return properties + return manifest -def is_properties_enabled(path: Path) -> bool: +def is_properties_enabled( + export_dir_path: Path, dir: str = ".v7", filename: str = "manifest.json" +) -> bool: """ - Returns whether the properties feature is enabled in the given manifest. + Returns whether the given export directory has properties enabled. Parameters ---------- - manifest : list[dict] - The manifest to check. + export_dir_path : Path + The path to the export directory. Returns ------- bool - Whether the properties feature is enabled in the given manifest. + Whether the given export directory has properties enabled. """ - return bool(parse_manifest(path)) - - -def split_paths_by_manifest(path: Path) -> tuple[Path, list[Property] | Path]: - """ - Returns a tuple with the given path and the properties of the manifest of the given path. - - Parameters - ---------- - path : Path - The path to split. - - Returns - ------- - tuple[Path, Optional[list[Property]]] - A tuple with the given path and the properties of the manifest of the given path. - """ - properties = parse_manifest(path) - if not properties: - return path, path - - return path, properties + path = export_dir_path / dir + if not path.exists(): + breakpoint() + return False + + manifest_path = path / filename + manifest_classes = parse_manifest(manifest_path).get("classes", []) + return any(_cls.get("properties") for _cls in manifest_classes) diff --git a/tests/darwin/data/manifest.json b/tests/darwin/data/manifest.json new file mode 100644 index 000000000..861ed2f55 --- /dev/null +++ b/tests/darwin/data/manifest.json @@ -0,0 +1,52 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": ["attributes", "instance_id"], + "color": "rgba(255,0,85,1.0)", + "properties": [ + { + "name": "Colors", + "type": "multi-select", + "options": [ + { + "value": "red", + "color": "rgba(255, 0, 0, 0)", + "type": "string" + }, + { + "value": "green", + "color": "rgba(0, 255, 0, 0)", + "type": "string" + }, + { + "value": "blue", + "color": "rgba(0, 0, 255, 0)", + "type": "string" + } + ], + "required": true + }, + { + "name": "Shape (expanded format)", + "type": "single-select", + "options": [ + { + "value": "Star", + "color": "rgba(0, 0, 0, 0)", + "type": "string" + }, + { + "value": "Circle", + "color": "rgba(150, 150, 150, 0)", + "type": "string" + } + ], + "required": false + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/manifest_empty_properties.json b/tests/darwin/data/manifest_empty_properties.json new file mode 100644 index 000000000..ea386bca8 --- /dev/null +++ b/tests/darwin/data/manifest_empty_properties.json @@ -0,0 +1,37 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle1", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle3", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/manifest_nested_properties.json b/tests/darwin/data/manifest_nested_properties.json new file mode 100644 index 000000000..ba7d430b0 --- /dev/null +++ b/tests/darwin/data/manifest_nested_properties.json @@ -0,0 +1,60 @@ +{ + "classes": [ + { + "name": "Bottle", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle1", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [] + }, + { + "name": "Bottle3", + "description": "Some additional text", + "type": "polygon", + "sub_types": [ + "attributes", + "instance_id" + ], + "color": "rgba(255,0,85,1.0)", + "properties": [ + { + "name": "Colors", + "type": "multi-select", + "options": [ + { + "value": "red", + "color": "rgba(255, 0, 0, 0)", + "type": "string" + }, + { + "value": "green", + "color": "rgba(0, 255, 0, 0)", + "type": "string" + }, + { + "value": "blue", + "color": "rgba(0, 0, 255, 0)", + "type": "string" + } + ], + "required": true + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index 1311d6867..33ba1b927 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -1,6 +1,18 @@ +import json +import shutil +import tempfile +from pathlib import Path from typing import Dict, List -from darwin.datatypes import Point, make_complex_polygon, make_polygon +import pytest + +from darwin.datatypes import ( + Point, + make_complex_polygon, + make_polygon, + parse_properties, + split_paths_by_manifest, +) class TestMakePolygon: @@ -65,3 +77,46 @@ def assert_annotation_class(annotation, name, type, internal_type=None) -> None: assert annotation.annotation_class.name == name assert annotation.annotation_class.annotation_type == type assert annotation.annotation_class.annotation_internal_type == internal_type + + +@pytest.mark.parametrize( + ("filename", "properties_n"), + ( + ("manifest.json", 2), + ("manifest_nested_properties.json", 1), + ("manifest_empty_properties.json", 0), + ), +) +def test_parse_properties(filename, properties_n): + manifest_path = Path(__file__).parent / f"data/{filename}" + + with open(manifest_path) as f: + manifest = json.load(f) + + properties = parse_properties(manifest) + assert len(properties) == properties_n + + +@pytest.mark.parametrize( + ("filename", "properties_n", "is_properties_enabled"), + ( + ("manifest.json", 2, True), + ("manifest_nested_properties.json", 1, True), + ("manifest_empty_properties.json", 0, False), + ), +) +def test_split_paths_by_manifest(filename, properties_n, is_properties_enabled): + manifest_path = Path(__file__).parent / f"data/{filename}" + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_v7 = Path(tmpdir) / ".v7" + tmpdir_v7.mkdir(exist_ok=True) + shutil.copy(manifest_path, tmpdir_v7) + + tmpdir = Path(tmpdir) + _path, properties = split_paths_by_manifest(tmpdir, filename=filename) + + is_path_file = _path.is_file() + assert is_path_file == is_properties_enabled + assert bool(properties) == is_properties_enabled + assert len(properties or []) == properties_n diff --git a/tests/darwin/path_utils_test.py b/tests/darwin/path_utils_test.py index 04b6b002a..f419f0c9d 100644 --- a/tests/darwin/path_utils_test.py +++ b/tests/darwin/path_utils_test.py @@ -1,6 +1,6 @@ -from pathlib import PurePosixPath +from pathlib import Path, PurePosixPath -from darwin.path_utils import construct_full_path, deconstruct_full_path +from darwin.path_utils import construct_full_path, deconstruct_full_path, parse_manifest def test_path_construction(): @@ -34,3 +34,12 @@ def test_path_deconstruction(): assert ("/a/b", "test.png") == deconstruct_full_path("/a/b/test.png") assert ("/", "test.png") == deconstruct_full_path("test.png") assert ("/", "test.png") == deconstruct_full_path("/test.png") + + +def test_parse_manifest(): + manifest_path = Path(__file__).parent / "data/manifest.json" + manifest = parse_manifest(manifest_path) + + # check that the manifest is parsed correctly + assert len(manifest["classes"]) == 1 + assert len(manifest["classes"][0]["properties"]) == 2 From f3e3a1d10c75d16e360b1373917fe92bed3f8b2d Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Wed, 6 Dec 2023 11:54:13 +0000 Subject: [PATCH 3/6] rm properties field in classes --- darwin/datatypes.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 427b65d3a..168d8bc83 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -215,8 +215,6 @@ class Annotation: # The darwin ID of this annotation. id: Optional[str] = None - properties: Optional[List[Property]] = None - def get_sub(self, annotation_type: str) -> Optional[SubAnnotation]: """ Returns the first SubAnnotation that matches the given type. @@ -271,8 +269,6 @@ class VideoAnnotation: # The darwin ID of this annotation. id: Optional[str] = None - properties: Optional[List[Property]] = None - def get_data( self, only_keyframes: bool = True, @@ -531,8 +527,6 @@ class AnnotationFile: # The Frame Count if this is a video annotation frame_count: Optional[int] = None - properties: Optional[List[Property]] = None - @property def full_path(self) -> str: """ From ac775d396023566a954e0e0c5893d3591c5a8613 Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Wed, 6 Dec 2023 14:24:46 +0000 Subject: [PATCH 4/6] remove breakpoint --- darwin/path_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/darwin/path_utils.py b/darwin/path_utils.py index 0c3a7a262..88f21ac9d 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -84,7 +84,6 @@ def is_properties_enabled( """ path = export_dir_path / dir if not path.exists(): - breakpoint() return False manifest_path = path / filename From f475a1d1bf5d91d05dbb2a691e1c98e8293e26a1 Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Wed, 6 Dec 2023 15:30:23 +0000 Subject: [PATCH 5/6] update is_properties_enabled - check annotations_paths dir, add tests --- darwin/path_utils.py | 10 +- .../data/annotation_with_properties.json | 328 ++++++++++++++++++ .../data/annotation_without_properties.json | 294 ++++++++++++++++ tests/darwin/datatypes_test.py | 4 +- tests/darwin/path_utils_test.py | 49 ++- 5 files changed, 681 insertions(+), 4 deletions(-) create mode 100644 tests/darwin/data/annotation_with_properties.json create mode 100644 tests/darwin/data/annotation_without_properties.json diff --git a/darwin/path_utils.py b/darwin/path_utils.py index 88f21ac9d..c58dae5f2 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -67,7 +67,10 @@ def parse_manifest(path: Path) -> dict: def is_properties_enabled( - export_dir_path: Path, dir: str = ".v7", filename: str = "manifest.json" + export_dir_path: Path, + dir: str = ".v7", + filename: str = "manifest.json", + annotations_dir: str = "annotations", ) -> bool: """ Returns whether the given export directory has properties enabled. @@ -84,6 +87,11 @@ def is_properties_enabled( """ path = export_dir_path / dir if not path.exists(): + annotations_path = export_dir_path / annotations_dir + for annotation_path in annotations_path.rglob("*"): + with open(annotation_path) as f: + if '"properties"' in f.read(): + return True return False manifest_path = path / filename diff --git a/tests/darwin/data/annotation_with_properties.json b/tests/darwin/data/annotation_with_properties.json new file mode 100644 index 000000000..ea2b9399b --- /dev/null +++ b/tests/darwin/data/annotation_with_properties.json @@ -0,0 +1,328 @@ +{ + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "221b-1.jpeg", + "path": "/", + "source_info": { + "item_id": "018c3a41-9aca-0fea-e62d-9a64ebd1a147", + "team": { + "name": "Rafal's Team", + "slug": "rafals-team" + }, + "dataset": { + "name": "saurabh-test", + "slug": "saurabh-test", + "dataset_management_url": "https://staging.v7labs.com/datasets/60844/dataset-management" + }, + "workview_url": "https://staging.v7labs.com/workview?dataset=60844&item=018c3a41-9aca-0fea-e62d-9a64ebd1a147" + }, + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 192, + "height": 263, + "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/6c88bb6a-b108-47c7-a9bf-754e9bd32330/thumbnail", + "source_files": [ + { + "file_name": "221b-1.jpeg", + "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/uploads/403ef007-817f-4df5-8566-e81b7adb06a2" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 12.0, + "w": 3.0, + "x": 101.0, + "y": 89.0 + }, + "id": "3893e60c-436b-47a8-9544-ff60d018a694", + "name": "1", + "polygon": { + "paths": [ + [ + { + "x": 102.0, + "y": 89.0 + }, + { + "x": 101.0, + "y": 90.0 + }, + { + "x": 102.0, + "y": 91.0 + }, + { + "x": 102.0, + "y": 101.0 + }, + { + "x": 104.0, + "y": 101.0 + }, + { + "x": 103.0, + "y": 100.0 + }, + { + "x": 103.0, + "y": 89.0 + } + ] + ] + }, + "properties": [ + { + "frame_index": 0, + "name": "1.2", + "type": "string", + "value": "1.2.2" + }, + { + "frame_index": 0, + "name": "1.1", + "type": "string", + "value": "1.1.2" + }, + { + "frame_index": 0, + "name": "1.3", + "type": "string", + "value": "1.3.1" + }, + { + "frame_index": 0, + "name": "1.1", + "type": "string", + "value": "1.1.1" + }, + { + "frame_index": 0, + "name": "1.2", + "type": "string", + "value": "1.2.1" + } + ], + "slot_names": [ + "0" + ], + "text": { + "text": "hello" + } + }, + { + "bounding_box": { + "h": 11.0, + "w": 7.0, + "x": 91.0, + "y": 89.0 + }, + "id": "2c6097ae-510e-4ed9-af1f-2bf0fc71021b", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 92.0, + "y": 89.0 + }, + { + "x": 92.0, + "y": 91.0 + }, + { + "x": 93.0, + "y": 91.0 + }, + { + "x": 94.0, + "y": 90.0 + }, + { + "x": 95.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 92.0 + }, + { + "x": 97.0, + "y": 94.0 + }, + { + "x": 95.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 97.0 + }, + { + "x": 91.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 99.0 + }, + { + "x": 94.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 98.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 97.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 97.0 + }, + { + "x": 98.0, + "y": 95.0 + }, + { + "x": 98.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 89.0 + } + ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + }, + { + "bounding_box": { + "h": 12.0, + "w": 8.0, + "x": 80.0, + "y": 88.0 + }, + "id": "3358878c-c473-4d97-868f-ac7afa91de81", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 80.0, + "y": 88.0 + }, + { + "x": 80.0, + "y": 90.0 + }, + { + "x": 81.0, + "y": 89.0 + }, + { + "x": 82.0, + "y": 89.0 + }, + { + "x": 83.0, + "y": 88.0 + }, + { + "x": 84.0, + "y": 88.0 + }, + { + "x": 87.0, + "y": 91.0 + }, + { + "x": 87.0, + "y": 92.0 + }, + { + "x": 80.0, + "y": 99.0 + }, + { + "x": 80.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 98.0 + }, + { + "x": 87.0, + "y": 99.0 + }, + { + "x": 85.0, + "y": 99.0 + }, + { + "x": 84.0, + "y": 98.0 + }, + { + "x": 84.0, + "y": 97.0 + }, + { + "x": 88.0, + "y": 93.0 + }, + { + "x": 88.0, + "y": 90.0 + }, + { + "x": 86.0, + "y": 88.0 + } + ] + ] + }, + "properties": [], + "slot_names": [ + "0" + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/data/annotation_without_properties.json b/tests/darwin/data/annotation_without_properties.json new file mode 100644 index 000000000..491353d8d --- /dev/null +++ b/tests/darwin/data/annotation_without_properties.json @@ -0,0 +1,294 @@ +{ + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "221b-1.jpeg", + "path": "/", + "source_info": { + "item_id": "018c3a41-9aca-0fea-e62d-9a64ebd1a147", + "team": { + "name": "Rafal's Team", + "slug": "rafals-team" + }, + "dataset": { + "name": "saurabh-test", + "slug": "saurabh-test", + "dataset_management_url": "https://staging.v7labs.com/datasets/60844/dataset-management" + }, + "workview_url": "https://staging.v7labs.com/workview?dataset=60844&item=018c3a41-9aca-0fea-e62d-9a64ebd1a147" + }, + "slots": [ + { + "type": "image", + "slot_name": "0", + "width": 192, + "height": 263, + "thumbnail_url": "https://staging.v7labs.com/api/v2/teams/rafals-team/files/6c88bb6a-b108-47c7-a9bf-754e9bd32330/thumbnail", + "source_files": [ + { + "file_name": "221b-1.jpeg", + "url": "https://staging.v7labs.com/api/v2/teams/rafals-team/uploads/403ef007-817f-4df5-8566-e81b7adb06a2" + } + ] + } + ] + }, + "annotations": [ + { + "bounding_box": { + "h": 12.0, + "w": 3.0, + "x": 101.0, + "y": 89.0 + }, + "id": "3893e60c-436b-47a8-9544-ff60d018a694", + "name": "1", + "polygon": { + "paths": [ + [ + { + "x": 102.0, + "y": 89.0 + }, + { + "x": 101.0, + "y": 90.0 + }, + { + "x": 102.0, + "y": 91.0 + }, + { + "x": 102.0, + "y": 101.0 + }, + { + "x": 104.0, + "y": 101.0 + }, + { + "x": 103.0, + "y": 100.0 + }, + { + "x": 103.0, + "y": 89.0 + } + ] + ] + }, + "slot_names": [ + "0" + ], + "text": { + "text": "hello" + } + }, + { + "bounding_box": { + "h": 11.0, + "w": 7.0, + "x": 91.0, + "y": 89.0 + }, + "id": "2c6097ae-510e-4ed9-af1f-2bf0fc71021b", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 92.0, + "y": 89.0 + }, + { + "x": 92.0, + "y": 91.0 + }, + { + "x": 93.0, + "y": 91.0 + }, + { + "x": 94.0, + "y": 90.0 + }, + { + "x": 95.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 92.0 + }, + { + "x": 97.0, + "y": 94.0 + }, + { + "x": 95.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 96.0 + }, + { + "x": 94.0, + "y": 97.0 + }, + { + "x": 91.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 100.0 + }, + { + "x": 93.0, + "y": 99.0 + }, + { + "x": 94.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 98.0, + "y": 100.0 + }, + { + "x": 97.0, + "y": 99.0 + }, + { + "x": 97.0, + "y": 98.0 + }, + { + "x": 96.0, + "y": 97.0 + }, + { + "x": 98.0, + "y": 95.0 + }, + { + "x": 98.0, + "y": 90.0 + }, + { + "x": 97.0, + "y": 89.0 + } + ] + ] + }, + "slot_names": [ + "0" + ] + }, + { + "bounding_box": { + "h": 12.0, + "w": 8.0, + "x": 80.0, + "y": 88.0 + }, + "id": "3358878c-c473-4d97-868f-ac7afa91de81", + "name": "2", + "polygon": { + "paths": [ + [ + { + "x": 80.0, + "y": 88.0 + }, + { + "x": 80.0, + "y": 90.0 + }, + { + "x": 81.0, + "y": 89.0 + }, + { + "x": 82.0, + "y": 89.0 + }, + { + "x": 83.0, + "y": 88.0 + }, + { + "x": 84.0, + "y": 88.0 + }, + { + "x": 87.0, + "y": 91.0 + }, + { + "x": 87.0, + "y": 92.0 + }, + { + "x": 80.0, + "y": 99.0 + }, + { + "x": 80.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 100.0 + }, + { + "x": 88.0, + "y": 98.0 + }, + { + "x": 87.0, + "y": 99.0 + }, + { + "x": 85.0, + "y": 99.0 + }, + { + "x": 84.0, + "y": 98.0 + }, + { + "x": 84.0, + "y": 97.0 + }, + { + "x": 88.0, + "y": 93.0 + }, + { + "x": 88.0, + "y": 90.0 + }, + { + "x": 86.0, + "y": 88.0 + } + ] + ] + }, + "slot_names": [ + "0" + ] + } + ] +} \ No newline at end of file diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index 33ba1b927..50c081256 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -109,11 +109,11 @@ def test_split_paths_by_manifest(filename, properties_n, is_properties_enabled): manifest_path = Path(__file__).parent / f"data/{filename}" with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_v7 = Path(tmpdir) / ".v7" + tmpdir = Path(tmpdir) + tmpdir_v7 = tmpdir / ".v7" tmpdir_v7.mkdir(exist_ok=True) shutil.copy(manifest_path, tmpdir_v7) - tmpdir = Path(tmpdir) _path, properties = split_paths_by_manifest(tmpdir, filename=filename) is_path_file = _path.is_file() diff --git a/tests/darwin/path_utils_test.py b/tests/darwin/path_utils_test.py index f419f0c9d..314d683cf 100644 --- a/tests/darwin/path_utils_test.py +++ b/tests/darwin/path_utils_test.py @@ -1,6 +1,15 @@ +import shutil +import tempfile from pathlib import Path, PurePosixPath -from darwin.path_utils import construct_full_path, deconstruct_full_path, parse_manifest +import pytest + +from darwin.path_utils import ( + construct_full_path, + deconstruct_full_path, + is_properties_enabled, + parse_manifest, +) def test_path_construction(): @@ -43,3 +52,41 @@ def test_parse_manifest(): # check that the manifest is parsed correctly assert len(manifest["classes"]) == 1 assert len(manifest["classes"][0]["properties"]) == 2 + + +@pytest.mark.parametrize( + ("filename", "expected_bool"), + ( + ("annotation_with_properties.json", True), + ("annotation_without_properties.json", False), + ), +) +def test_is_properties_enabled(filename, expected_bool): + annotation_path = Path(__file__).parent / f"data/{filename}" + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + tmpdir_annotations = tmpdir / "annotations" + tmpdir_annotations.mkdir(exist_ok=True) + shutil.copy(annotation_path, tmpdir_annotations) + + assert is_properties_enabled(tmpdir) == expected_bool + + +@pytest.mark.parametrize( + ("filename", "expected_bool"), + ( + ("manifest.json", True), + ("manifest_nested_properties.json", True), + ("manifest_empty_properties.json", False), + ), +) +def test_is_properties_enabled_v7(filename, expected_bool): + manifest_path = Path(__file__).parent / f"data/{filename}" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = Path(tmpdir) + tmpdir_v7 = tmpdir / ".v7" + tmpdir_v7.mkdir(exist_ok=True) + shutil.copy(manifest_path, tmpdir_v7) + + assert is_properties_enabled(tmpdir, filename=filename) == expected_bool From af38cee694f317022946492aca77cdd35d2cefb8 Mon Sep 17 00:00:00 2001 From: Saurabh Chopra Date: Wed, 6 Dec 2023 16:28:14 +0000 Subject: [PATCH 6/6] update manifest -> metadata, add PropertyClass and updated tests accordingly --- darwin/datatypes.py | 70 ++++++++++++------- darwin/path_utils.py | 24 +++---- .../data/{manifest.json => metadata.json} | 0 ...es.json => metadata_empty_properties.json} | 0 ...s.json => metadata_nested_properties.json} | 0 tests/darwin/datatypes_test.py | 42 ++++++----- tests/darwin/path_utils_test.py | 24 +++---- 7 files changed, 94 insertions(+), 66 deletions(-) rename tests/darwin/data/{manifest.json => metadata.json} (100%) rename tests/darwin/data/{manifest_empty_properties.json => metadata_empty_properties.json} (100%) rename tests/darwin/data/{manifest_nested_properties.json => metadata_nested_properties.json} (100%) diff --git a/darwin/datatypes.py b/darwin/datatypes.py index 168d8bc83..56ccf2a73 100644 --- a/darwin/datatypes.py +++ b/darwin/datatypes.py @@ -22,7 +22,7 @@ except ImportError: NDArray = Any # type:ignore -from darwin.path_utils import construct_full_path, is_properties_enabled, parse_manifest +from darwin.path_utils import construct_full_path, is_properties_enabled, parse_metadata # Utility types @@ -407,55 +407,75 @@ class Property: options: list[dict[str, str]] -def parse_properties(manifest: dict[str, Any]) -> list[Property]: +@dataclass +class PropertyClass: + name: str + type: str + description: Optional[str] + color: Optional[str] = None + sub_types: Optional[list[str]] = None + properties: Optional[list[Property]] = None + + +def parse_property_classes(metadata: dict[str, Any]) -> list[PropertyClass]: """ - Parses the given manifest and returns a list of all the properties in it. + Parses the metadata file and returns a list of PropertyClass objects. Parameters ---------- - properties : list[dict] - The properties to parse. + metadata : dict[str, Any] + The metadata file. Returns ------- - list[Property] - A list of all the properties in the given manifest. + list[PropertyClass] + A list of PropertyClass objects. """ - assert "classes" in manifest, "Manifest does not contain classes" + assert "classes" in metadata, "Metadata does not contain classes" - properties = [] - for m_cls in manifest["classes"]: - for property in m_cls["properties"]: - properties.append(Property(**property)) + classes = [] + for metadata_cls in metadata["classes"]: + assert ( + "properties" in metadata_cls + ), "Metadata class does not contain properties" + classes.append( + PropertyClass( + name=metadata_cls["name"], + type=metadata_cls["type"], + description=metadata_cls.get("description"), + color=metadata_cls.get("color"), + sub_types=metadata_cls.get("sub_types"), + properties=[Property(**p) for p in metadata_cls["properties"]], + ) + ) - return properties + return classes -def split_paths_by_manifest( - path, dir: str = ".v7", filename: str = "manifest.json" -) -> tuple[Path, Optional[list[Property]]]: +def split_paths_by_metadata( + path, dir: str = ".v7", filename: str = "metadata.json" +) -> tuple[Path, Optional[list[PropertyClass]]]: """ - Returns the path to the manifest and the properties of the given path. + Splits the given path into two: the path to the metadata file and the path to the properties Parameters ---------- path : Path - The path to the manifest file, if exists. otherwise the path to the export dir itself. + The path to the export directory. Returns ------- - tuple[Path, Optional[list[Property]]] - A tuple where the first element is the path to the manifest and the second is the list of - properties of the given path. + tuple[Path, Optional[list[PropertyClass]]] + A tuple containing the path to the metadata file and the list of property classes. """ if not is_properties_enabled(path, dir, filename): return path, None - manifest_path = path / dir / filename - manifest = parse_manifest(manifest_path) - properties = parse_properties(manifest) + metadata_path = path / dir / filename + metadata = parse_metadata(metadata_path) + property_classes = parse_property_classes(metadata) - return manifest_path, properties + return metadata_path, property_classes @dataclass diff --git a/darwin/path_utils.py b/darwin/path_utils.py index c58dae5f2..6d4197d3b 100644 --- a/darwin/path_utils.py +++ b/darwin/path_utils.py @@ -46,30 +46,30 @@ def deconstruct_full_path(filename: str) -> Tuple[str, str]: return str(posix_path.parent), posix_path.name -def parse_manifest(path: Path) -> dict: +def parse_metadata(path: Path) -> dict: """ - Parses the given manifest and returns a list of all the properties in it. + Returns the parsed metadata file. Parameters ---------- - path : str - The path to the manifest. + path : Path + The path to the metadata file. Returns ------- - list[Property] - A list of all the properties in the given manifest. + dict + The parsed metadata file. """ with open(path) as f: - manifest = json.load(f) + metadata = json.load(f) - return manifest + return metadata def is_properties_enabled( export_dir_path: Path, dir: str = ".v7", - filename: str = "manifest.json", + filename: str = "metadata.json", annotations_dir: str = "annotations", ) -> bool: """ @@ -94,6 +94,6 @@ def is_properties_enabled( return True return False - manifest_path = path / filename - manifest_classes = parse_manifest(manifest_path).get("classes", []) - return any(_cls.get("properties") for _cls in manifest_classes) + metadata_path = path / filename + metadata_classes = parse_metadata(metadata_path).get("classes", []) + return any(_cls.get("properties") for _cls in metadata_classes) diff --git a/tests/darwin/data/manifest.json b/tests/darwin/data/metadata.json similarity index 100% rename from tests/darwin/data/manifest.json rename to tests/darwin/data/metadata.json diff --git a/tests/darwin/data/manifest_empty_properties.json b/tests/darwin/data/metadata_empty_properties.json similarity index 100% rename from tests/darwin/data/manifest_empty_properties.json rename to tests/darwin/data/metadata_empty_properties.json diff --git a/tests/darwin/data/manifest_nested_properties.json b/tests/darwin/data/metadata_nested_properties.json similarity index 100% rename from tests/darwin/data/manifest_nested_properties.json rename to tests/darwin/data/metadata_nested_properties.json diff --git a/tests/darwin/datatypes_test.py b/tests/darwin/datatypes_test.py index 50c081256..784423800 100644 --- a/tests/darwin/datatypes_test.py +++ b/tests/darwin/datatypes_test.py @@ -10,8 +10,8 @@ Point, make_complex_polygon, make_polygon, - parse_properties, - split_paths_by_manifest, + parse_property_classes, + split_paths_by_metadata, ) @@ -80,32 +80,37 @@ def assert_annotation_class(annotation, name, type, internal_type=None) -> None: @pytest.mark.parametrize( - ("filename", "properties_n"), + ("filename", "property_class_n", "properties_n"), ( - ("manifest.json", 2), - ("manifest_nested_properties.json", 1), - ("manifest_empty_properties.json", 0), + ("metadata.json", 1, [2]), + ("metadata_nested_properties.json", 3, [0, 0, 1]), + ("metadata_empty_properties.json", 3, [0, 0, 0]), ), ) -def test_parse_properties(filename, properties_n): +def test_parse_properties(filename, property_class_n, properties_n): manifest_path = Path(__file__).parent / f"data/{filename}" with open(manifest_path) as f: manifest = json.load(f) - properties = parse_properties(manifest) - assert len(properties) == properties_n + property_classes = parse_property_classes(manifest) + assert len(property_classes) == property_class_n + assert [ + len(property_class.properties or []) for property_class in property_classes + ] == properties_n @pytest.mark.parametrize( - ("filename", "properties_n", "is_properties_enabled"), + ("filename", "property_class_n", "properties_n", "is_properties_enabled"), ( - ("manifest.json", 2, True), - ("manifest_nested_properties.json", 1, True), - ("manifest_empty_properties.json", 0, False), + ("metadata.json", 1, [2], True), + ("metadata_nested_properties.json", 3, [0, 0, 1], True), + ("metadata_empty_properties.json", 0, [], False), ), ) -def test_split_paths_by_manifest(filename, properties_n, is_properties_enabled): +def test_split_paths_by_manifest( + filename, property_class_n, properties_n, is_properties_enabled +): manifest_path = Path(__file__).parent / f"data/{filename}" with tempfile.TemporaryDirectory() as tmpdir: @@ -114,9 +119,12 @@ def test_split_paths_by_manifest(filename, properties_n, is_properties_enabled): tmpdir_v7.mkdir(exist_ok=True) shutil.copy(manifest_path, tmpdir_v7) - _path, properties = split_paths_by_manifest(tmpdir, filename=filename) + _path, property_classes = split_paths_by_metadata(tmpdir, filename=filename) is_path_file = _path.is_file() assert is_path_file == is_properties_enabled - assert bool(properties) == is_properties_enabled - assert len(properties or []) == properties_n + assert len(property_classes or []) == property_class_n + assert [ + len(property_class.properties or []) + for property_class in property_classes or [] + ] == properties_n diff --git a/tests/darwin/path_utils_test.py b/tests/darwin/path_utils_test.py index 314d683cf..838a106ab 100644 --- a/tests/darwin/path_utils_test.py +++ b/tests/darwin/path_utils_test.py @@ -8,7 +8,7 @@ construct_full_path, deconstruct_full_path, is_properties_enabled, - parse_manifest, + parse_metadata, ) @@ -45,13 +45,13 @@ def test_path_deconstruction(): assert ("/", "test.png") == deconstruct_full_path("/test.png") -def test_parse_manifest(): - manifest_path = Path(__file__).parent / "data/manifest.json" - manifest = parse_manifest(manifest_path) +def test_parse_metadata(): + metadata_path = Path(__file__).parent / "data/metadata.json" + metadata = parse_metadata(metadata_path) - # check that the manifest is parsed correctly - assert len(manifest["classes"]) == 1 - assert len(manifest["classes"][0]["properties"]) == 2 + # check that the metadata is parsed correctly + assert len(metadata["classes"]) == 1 + assert len(metadata["classes"][0]["properties"]) == 2 @pytest.mark.parametrize( @@ -76,17 +76,17 @@ def test_is_properties_enabled(filename, expected_bool): @pytest.mark.parametrize( ("filename", "expected_bool"), ( - ("manifest.json", True), - ("manifest_nested_properties.json", True), - ("manifest_empty_properties.json", False), + ("metadata.json", True), + ("metadata_nested_properties.json", True), + ("metadata_empty_properties.json", False), ), ) def test_is_properties_enabled_v7(filename, expected_bool): - manifest_path = Path(__file__).parent / f"data/{filename}" + metadata_path = Path(__file__).parent / f"data/{filename}" with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) tmpdir_v7 = tmpdir / ".v7" tmpdir_v7.mkdir(exist_ok=True) - shutil.copy(manifest_path, tmpdir_v7) + shutil.copy(metadata_path, tmpdir_v7) assert is_properties_enabled(tmpdir, filename=filename) == expected_bool