diff --git a/.gitignore b/.gitignore index 7c80aea09..29622440c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ __pycache__/ output/ data/ +!tests/darwin/data darwin_py.egg-info/PKG-INFO *.png diff --git a/darwin/cli.py b/darwin/cli.py index bcf9495c7..2657e2878 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -2,7 +2,10 @@ import getpass import os +import platform from argparse import ArgumentParser, Namespace +from datetime import datetime +from json import dumps import requests.exceptions @@ -40,6 +43,21 @@ def main() -> None: f._error("The team specified is not in the configuration, please authenticate first.") except requests.exceptions.ConnectionError: f._error("Darwin seems unreachable, please try again in a minute or contact support.") + except Exception as e: # Catch unhandled exceptions + filename = f"darwin_error_{datetime.now().timestamp()}.log" + + fd = open(filename, "w") + fd.write("Darwin CLI error log") + fd.write(f"Version: {__version__}") + fd.write(f"OS: {platform.platform()}") + fd.write(f"Command: {dumps(args, check_circular=True)}") + fd.write(f"Error: {dumps(e, check_circular=True)}") + fd.close() + + f._error( + "An unexpected error occurred, errors have been written to {filename}, please contact support, and send them the file." + + str(e) + ) def _run(args: Namespace, parser: ArgumentParser) -> None: diff --git a/darwin/exceptions.py b/darwin/exceptions.py index f86319d3e..2de4ca1c0 100644 --- a/darwin/exceptions.py +++ b/darwin/exceptions.py @@ -112,7 +112,7 @@ class Unauthorized(Exception): """ def __str__(self): - return f"Unauthorized" + return "Unauthorized" class OutdatedDarwinJSONFormat(Exception): @@ -142,3 +142,43 @@ def __init__(self, version: str): def __str__(self): return f"Unknown version: '{self.version}'" + + +class UnsupportedImportAnnotationType(Exception): + """ + Used when one tries to parse an annotation with an unsupported type. + """ + + def __init__(self, import_type: str, annotation_type: str): + """ + Parameters + ---------- + import_type: str + The type of import, e.g. "dataloop". + annotation_type: str + The unsupported annotation type. + """ + super().__init__( + f"Unsupported annotation type {annotation_type} for {import_type} import" + ) + self.import_type = import_type + self.annotation_type = annotation_type + + +class DataloopComplexPolygonsNotYetSupported(Exception): + """ + Used when one tries to parse an annotation with a complex polygon. + """ + + def __init__( + self, + ): + """ + Parameters + ---------- + import_type: str + The type of import, e.g. "dataloop". + annotation_type: str + The unsupported annotation type. + """ + super().__init__("Complex polygons not yet supported for dataloop import") diff --git a/darwin/importer/formats/dataloop.py b/darwin/importer/formats/dataloop.py index a195f7d59..0ca78eff7 100644 --- a/darwin/importer/formats/dataloop.py +++ b/darwin/importer/formats/dataloop.py @@ -3,6 +3,10 @@ from typing import Any, Dict, List, Optional, Set import darwin.datatypes as dt +from darwin.exceptions import ( + DataloopComplexPolygonsNotYetSupported, + UnsupportedImportAnnotationType, +) def parse_path(path: Path) -> Optional[dt.AnnotationFile]: @@ -25,10 +29,18 @@ def parse_path(path: Path) -> Optional[dt.AnnotationFile]: return None with path.open() as f: data = json.load(f) - annotations: List[dt.Annotation] = list(filter(None, map(_parse_annotation, data["annotations"]))) - annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations]) + annotations: List[dt.Annotation] = list( + filter(None, map(_parse_annotation, data["annotations"])) + ) + annotation_classes: Set[dt.AnnotationClass] = set( + [annotation.annotation_class for annotation in annotations] + ) return dt.AnnotationFile( - path, _remove_leading_slash(data["filename"]), annotation_classes, annotations, remote_path="/" + path, + _remove_leading_slash(data["filename"]), + annotation_classes, + annotations, + remote_path="/", ) @@ -42,8 +54,8 @@ def _remove_leading_slash(filename: str) -> str: def _parse_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]: annotation_type = annotation["type"] annotation_label = annotation["label"] - if annotation_type not in ["box", "class"]: - raise ValueError(f"Unknown supported annotation type: {annotation_type}") + if annotation_type not in ["box", "class", "segment"]: + raise UnsupportedImportAnnotationType("dataloop", annotation_type) if len(annotation["metadata"]["system"].get("snapshots_", [])) > 1: raise ValueError("multiple snapshots per annotations are not supported") @@ -58,4 +70,12 @@ def _parse_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotation]: x2, y2 = coords[1]["x"], coords[1]["y"] return dt.make_bounding_box(annotation_label, x1, y1, x2 - x1, y2 - y1) + if annotation_type == "segment": + coords = annotation["coordinates"] + if len(coords) != 1: + raise DataloopComplexPolygonsNotYetSupported() + + points: List[dt.Point] = [{"x": c["x"], "y": c["y"]} for c in coords[0]] + return dt.make_polygon(annotation_label, point_path=points) + return None diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..1fd489337 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 160 diff --git a/setup.py b/setup.py index d7601ca7f..d61f1d3c2 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open(Path(__file__).parent / "darwin" / "version" / "__init__.py", "r") as f: content = f.read() # from https://www.py4u.net/discuss/139845 - version = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', content).group(1) + version = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', content).group(1) # type: ignore with open("README.md", "rb") as f: long_description = f.read().decode("utf-8") @@ -37,6 +37,7 @@ ], extras_require={ "test": ["responses", "pytest", "pytest-describe", "scikit-learn"], + "dev": ["black", "flake8", "isort", "mypy", "responses", "pytest", "pytest-describe", "scikit-learn"], "ml": ["scikit-learn", "torch", "torchvision"], "medical": ["nibabel", "connected-components-3d"], }, diff --git a/tests/darwin/data/dataloop.example.json b/tests/darwin/data/dataloop.example.json new file mode 100644 index 000000000..64ff82378 --- /dev/null +++ b/tests/darwin/data/dataloop.example.json @@ -0,0 +1,363 @@ +{ + "annotations": [ + { + "id": "1", + "datasetId": "61d5e83f5575066ffd58fcda", + "type": "box", + "label": "box_class", + "attributes": [], + "coordinates": [ + { + "x": 288.81, + "y": 845.49 + }, + { + "x": 2221.32, + "y": 3528.24 + } + ], + "metadata": { + "system": { + "status": null, + "startTime": 0, + "endTime": 1, + "frame": 0, + "endFrame": 1, + "snapshots_": [], + "parentId": null, + "clientId": "ab0eaf4b-4dc5-471c-921f-f8928d54d1a1", + "automated": false, + "objectId": "2", + "isOpen": false, + "isOnlyLocal": false, + "attributes": { + "1": false + }, + "clientParentId": null, + "system": false, + "description": null, + "itemLinks": [], + "openAnnotationVersion": "1.29.1-rc.8", + "recipeId": "61d5e83f71a8721c42b8f02e", + "taskId": "61d5eb2f3e1b9e55742d498c", + "assignmentId": "61d5eb2f3e1b9eb7c22d498d" + }, + "user": {} + }, + "creator": "vakasix267@zoeyy.com", + "createdAt": "2022-01-05T19:26:23.252Z", + "updatedBy": "vakasix267@zoeyy.com", + "updatedAt": "2022-01-05T19:26:23.252Z", + "itemId": "61d5eb9d14b5b8ea83e18037", + "url": "https://rc-gate.dataloop.ai/api/v1/annotations/61d5f0dfebda8c3885b7eae5", + "item": "https://rc-gate.dataloop.ai/api/v1/items/61d5eb9d14b5b8ea83e18037", + "dataset": "https://rc-gate.dataloop.ai/api/v1/datasets/61d5e83f5575066ffd58fcda", + "hash": "e8726832771219919976179815544bba59bf15d5", + "source": "ui" + }, + { + "id": "2", + "datasetId": "61d5e83f5575066ffd58fcda", + "type": "class", + "label": "class_class", + "attributes": [], + "metadata": { + "system": { + "status": null, + "startTime": 0, + "endTime": 1, + "frame": 0, + "endFrame": 1, + "snapshots_": [], + "parentId": null, + "clientId": "ab0eaf4b-4dc5-471c-921f-f8928d54d1a1", + "automated": false, + "objectId": "2", + "isOpen": false, + "isOnlyLocal": false, + "attributes": { + "1": false + }, + "clientParentId": null, + "system": false, + "description": null, + "itemLinks": [], + "openAnnotationVersion": "1.29.1-rc.8", + "recipeId": "61d5e83f71a8721c42b8f02e", + "taskId": "61d5eb2f3e1b9e55742d498c", + "assignmentId": "61d5eb2f3e1b9eb7c22d498d" + }, + "user": {} + }, + "creator": "vakasix267@zoeyy.com", + "createdAt": "2022-01-05T19:26:23.252Z", + "updatedBy": "vakasix267@zoeyy.com", + "updatedAt": "2022-01-05T19:26:23.252Z", + "itemId": "61d5eb9d14b5b8ea83e18037", + "url": "https://rc-gate.dataloop.ai/api/v1/annotations/61d5f0dfebda8c3885b7eae5", + "item": "https://rc-gate.dataloop.ai/api/v1/items/61d5eb9d14b5b8ea83e18037", + "dataset": "https://rc-gate.dataloop.ai/api/v1/datasets/61d5e83f5575066ffd58fcda", + "hash": "e8726832771219919976179815544bba59bf15d5", + "source": "ui" + }, + { + "id": "3", + "datasetId": "61d5e83f5575066ffd58fcda", + "type": "segment", + "label": "segment_class", + "attributes": [], + "coordinates": [ + [ + { + "x": 856.7307692307692, + "y": 1077.8846153846152 + }, + { + "x": 575, + "y": 657.6923076923076 + }, + { + "x": 989.4230769230768, + "y": 409.6153846153846 + }, + { + "x": 974.0384615384614, + "y": 640.3846153846154 + }, + { + "x": 1033.653846153846, + "y": 915.3846153846154 + }, + { + "x": 1106.730769230769, + "y": 1053.8461538461538 + }, + { + "x": 1204.8076923076922, + "y": 1079.8076923076922 + } + ] + ], + "metadata": { + "system": { + "status": null, + "startTime": 0, + "endTime": 1, + "frame": 0, + "endFrame": 1, + "snapshots_": [], + "parentId": null, + "clientId": "ab0eaf4b-4dc5-471c-921f-f8928d54d1a1", + "automated": false, + "objectId": "2", + "isOpen": false, + "isOnlyLocal": false, + "attributes": { + "1": false + }, + "clientParentId": null, + "system": false, + "description": null, + "itemLinks": [], + "openAnnotationVersion": "1.29.1-rc.8", + "recipeId": "61d5e83f71a8721c42b8f02e", + "taskId": "61d5eb2f3e1b9e55742d498c", + "assignmentId": "61d5eb2f3e1b9eb7c22d498d" + }, + "user": {} + }, + "creator": "vakasix267@zoeyy.com", + "createdAt": "2022-01-05T19:26:23.252Z", + "updatedBy": "vakasix267@zoeyy.com", + "updatedAt": "2022-01-05T19:26:23.252Z", + "itemId": "61d5eb9d14b5b8ea83e18037", + "url": "https://rc-gate.dataloop.ai/api/v1/annotations/61d5f0dfebda8c3885b7eae5", + "item": "https://rc-gate.dataloop.ai/api/v1/items/61d5eb9d14b5b8ea83e18037", + "dataset": "https://rc-gate.dataloop.ai/api/v1/datasets/61d5e83f5575066ffd58fcda", + "hash": "e8726832771219919976179815544bba59bf15d5", + "source": "ui" + }, + { + "id": "3", + "datasetId": "61d5e83f5575066ffd58fcda", + "type": "UNSUPPORTED_TYPE", + "label": "segment_class", + "attributes": [], + "coordinates": [ + { + "x": 856.7307692307692, + "y": 1077.8846153846152 + }, + { + "x": 575, + "y": 657.6923076923076 + }, + { + "x": 989.4230769230768, + "y": 409.6153846153846 + }, + { + "x": 974.0384615384614, + "y": 640.3846153846154 + }, + { + "x": 1033.653846153846, + "y": 915.3846153846154 + }, + { + "x": 1106.730769230769, + "y": 1053.8461538461538 + }, + { + "x": 1204.8076923076922, + "y": 1079.8076923076922 + } + ], + "metadata": { + "system": { + "status": null, + "startTime": 0, + "endTime": 1, + "frame": 0, + "endFrame": 1, + "snapshots_": [], + "parentId": null, + "clientId": "ab0eaf4b-4dc5-471c-921f-f8928d54d1a1", + "automated": false, + "objectId": "2", + "isOpen": false, + "isOnlyLocal": false, + "attributes": { + "1": false + }, + "clientParentId": null, + "system": false, + "description": null, + "itemLinks": [], + "openAnnotationVersion": "1.29.1-rc.8", + "recipeId": "61d5e83f71a8721c42b8f02e", + "taskId": "61d5eb2f3e1b9e55742d498c", + "assignmentId": "61d5eb2f3e1b9eb7c22d498d" + }, + "user": {} + }, + "creator": "vakasix267@zoeyy.com", + "createdAt": "2022-01-05T19:26:23.252Z", + "updatedBy": "vakasix267@zoeyy.com", + "updatedAt": "2022-01-05T19:26:23.252Z", + "itemId": "61d5eb9d14b5b8ea83e18037", + "url": "https://rc-gate.dataloop.ai/api/v1/annotations/61d5f0dfebda8c3885b7eae5", + "item": "https://rc-gate.dataloop.ai/api/v1/items/61d5eb9d14b5b8ea83e18037", + "dataset": "https://rc-gate.dataloop.ai/api/v1/datasets/61d5e83f5575066ffd58fcda", + "hash": "e8726832771219919976179815544bba59bf15d5", + "source": "ui" + }, + { + "id": "5", + "datasetId": "61d5e83f5575066ffd58fcda", + "type": "segment", + "label": "segment_class", + "attributes": [], + "coordinates": [ + [ + { + "x": 856.7307692307692, + "y": 1077.8846153846152 + }, + { + "x": 575, + "y": 657.6923076923076 + }, + { + "x": 989.4230769230768, + "y": 409.6153846153846 + }, + { + "x": 974.0384615384614, + "y": 640.3846153846154 + }, + { + "x": 1033.653846153846, + "y": 915.3846153846154 + }, + { + "x": 1106.730769230769, + "y": 1053.8461538461538 + }, + { + "x": 1204.8076923076922, + "y": 1079.8076923076922 + } + ], + [ + { + "x": 856.7307692307692, + "y": 1077.8846153846152 + }, + { + "x": 575, + "y": 657.6923076923076 + }, + { + "x": 989.4230769230768, + "y": 409.6153846153846 + }, + { + "x": 974.0384615384614, + "y": 640.3846153846154 + }, + { + "x": 1033.653846153846, + "y": 915.3846153846154 + }, + { + "x": 1106.730769230769, + "y": 1053.8461538461538 + }, + { + "x": 1204.8076923076922, + "y": 1079.8076923076922 + } + ] + ], + "metadata": { + "system": { + "status": null, + "startTime": 0, + "endTime": 1, + "frame": 0, + "endFrame": 1, + "snapshots_": [], + "parentId": null, + "clientId": "ab0eaf4b-4dc5-471c-921f-f8928d54d1a1", + "automated": false, + "objectId": "2", + "isOpen": false, + "isOnlyLocal": false, + "attributes": { + "1": false + }, + "clientParentId": null, + "system": false, + "description": null, + "itemLinks": [], + "openAnnotationVersion": "1.29.1-rc.8", + "recipeId": "61d5e83f71a8721c42b8f02e", + "taskId": "61d5eb2f3e1b9e55742d498c", + "assignmentId": "61d5eb2f3e1b9eb7c22d498d" + }, + "user": {} + }, + "creator": "vakasix267@zoeyy.com", + "createdAt": "2022-01-05T19:26:23.252Z", + "updatedBy": "vakasix267@zoeyy.com", + "updatedAt": "2022-01-05T19:26:23.252Z", + "itemId": "61d5eb9d14b5b8ea83e18037", + "url": "https://rc-gate.dataloop.ai/api/v1/annotations/61d5f0dfebda8c3885b7eae5", + "item": "https://rc-gate.dataloop.ai/api/v1/items/61d5eb9d14b5b8ea83e18037", + "dataset": "https://rc-gate.dataloop.ai/api/v1/datasets/61d5e83f5575066ffd58fcda", + "hash": "e8726832771219919976179815544bba59bf15d5", + "source": "ui" + } + ] +} \ No newline at end of file diff --git a/tests/darwin/importer/formats/import_dataloop_test.py b/tests/darwin/importer/formats/import_dataloop_test.py new file mode 100644 index 000000000..2ef4a2ad6 --- /dev/null +++ b/tests/darwin/importer/formats/import_dataloop_test.py @@ -0,0 +1,145 @@ +from json import loads as json_loads +from math import isclose as math_isclose +from os.path import dirname, join, realpath +from pathlib import Path +from typing import Dict, Tuple, Union +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from darwin.datatypes import Annotation +from darwin.exceptions import ( + DataloopComplexPolygonsNotYetSupported, + UnsupportedImportAnnotationType, +) +from darwin.importer.formats.dataloop import ( + _parse_annotation, + _remove_leading_slash, + parse_path, +) + + +class DataLoopTestCase(TestCase): + def setUp(self) -> None: + _fd = open(realpath(join(dirname(__file__), "..", "..", "data", "dataloop.example.json"))) + self.DATALOOP_MOCK_DATA = _fd.read() + _fd.close() + + def assertApproximatelyEqualNumber(self, a: Union[int, float], b: Union[int, float], places: int = 8): + math_isclose(a, b, rel_tol=10**-places) + + DARWIN_PARSED_DATA = { + "filename": "test.jpg", + "annotations": [ + {"class": "class_1"}, + {"class": "class_2"}, + {"class": "class_3"}, + ], + } + + +class TestParsePath(DataLoopTestCase): + def tearDown(self): + patch.stopall() + + @patch( + "darwin.importer.formats.dataloop._remove_leading_slash", + ) + def test_returns_none_if_file_extension_is_not_json(self, mock_remove_leading_slash): + self.assertIsNone(parse_path(Path("foo.bar"))) + + @patch( + "darwin.importer.formats.dataloop._remove_leading_slash", + ) + @patch("darwin.importer.formats.dataloop.json.load") + @patch("darwin.importer.formats.dataloop.Path.open") + @patch("darwin.importer.formats.dataloop._parse_annotation") + def test_opens_annotations_file_and_parses( + self, + _parse_annotation_mock: MagicMock, + path_open_mock: MagicMock, + json_load_mock: MagicMock, + mock_remove_leading_slash: MagicMock, + ): + json_load_mock.return_value = self.DARWIN_PARSED_DATA + test_path = "foo.json" + + parse_path(Path(test_path)) + + self.assertEqual(_parse_annotation_mock.call_count, 3) + path_open_mock.assert_called_once() + json_load_mock.assert_called_once() + mock_remove_leading_slash.assert_called_once() + + +class TestRemoveLeadingSlash(DataLoopTestCase): + def tearDown(self) -> None: + patch.stopall() + + def test_removes_slash_if_present(self): + self.assertEqual(_remove_leading_slash("/foo"), "foo") + + def test_does_not_remove_slash_if_not_present(self): + self.assertEqual(_remove_leading_slash("foo"), "foo") + + +class TestParseAnnotation(DataLoopTestCase): + def setUp(self): + super().setUp() + self.parsed_json = json_loads(self.DATALOOP_MOCK_DATA) + + def tearDown(self) -> None: + patch.stopall() + + def test_handles_box_type(self): + from darwin.importer.formats.dataloop import _parse_annotation as pa + + with patch("darwin.importer.formats.dataloop.dt.make_bounding_box") as make_bounding_box_mock: + make_bounding_box_mock.return_value = Annotation("class_1", 0, 0, 0, 0) + pa(self.parsed_json["annotations"][0]) # 0 is a box type + + make_bounding_box_mock.assert_called_with("box_class", 288.81, 845.49, 1932.5100000000002, 2682.75) + + def test_handles_class_type(self): + annotation = _parse_annotation(self.parsed_json["annotations"][1]) # 1 is a class type + self.assertEqual(annotation, None) + + def test_handles_segment_type(self): + from darwin.importer.formats.dataloop import _parse_annotation as pa + + with patch("darwin.importer.formats.dataloop.dt.make_polygon") as make_polygon_mock: + pa(self.parsed_json["annotations"][2]) # 2 is a segment type + + if "kwargs" in make_polygon_mock.call_args: + + def make_tuple_entry(point: Dict[str, float]) -> Tuple[float, float]: + return (point["x"], point["y"]) + + point_path = [make_tuple_entry(p) for p in make_polygon_mock.call_args.kwargs["point_path"]] + expectation_points = [ + (856.73076923, 1077.88461538), + (575, 657.69230769), + (989.42307692, 409.61538462), + (974.03846154, 640.38461538), + (1033.65384615, 915.38461538), + (1106.73076923, 1053.84615385), + (1204.80769231, 1079.80769231), + ] + + [ + self.assertApproximatelyEqualNumber(a[0], b[0]) and self.assertApproximatelyEqualNumber(a[1], b[1]) + for a, b in zip(point_path, expectation_points) + ] + self.assertTrue(make_polygon_mock.call_args[0][0], "segment_class") + + def test_throws_on_unknown_type(self): + try: + _parse_annotation(self.parsed_json["annotations"][3]) # 3 is an unsupported type + except UnsupportedImportAnnotationType as e: + self.assertEqual(e.import_type, "dataloop") + self.assertEqual(e.annotation_type, "UNSUPPORTED_TYPE") + except Exception as e: + self.fail(f"Test threw wrong exception: {e}") + + def test_rejects_complex_polygons(self): + with self.assertRaises(DataloopComplexPolygonsNotYetSupported): + _parse_annotation(self.parsed_json["annotations"][4]) # 4 is a complex polygon