diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index d79ecae76..4e88cf0cc 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -9,6 +9,7 @@ from darwin.utils import ( SUPPORTED_IMAGE_EXTENSIONS, get_image_path_from_stream, + is_stream_list_empty, parse_darwin_json, stream_darwin_json, ) @@ -67,6 +68,7 @@ def __init__( split: str = "default", split_type: str = "random", release_name: Optional[str] = None, + keep_empty_annotations: bool = False, ): self.dataset_path = dataset_path self.annotation_type = annotation_type @@ -75,10 +77,9 @@ def __init__( self.original_classes = None self.original_images_path: Optional[List[Path]] = None self.original_annotations_path: Optional[List[Path]] = None + self.keep_empty_annotations = keep_empty_annotations - release_path, annotations_dir, images_dir = self._initial_setup( - dataset_path, release_name - ) + release_path, annotations_dir, images_dir = self._initial_setup(dataset_path, release_name) self._validate_inputs(partition, split_type, annotation_type) # Get the list of classes @@ -101,6 +102,7 @@ def __init__( split, partition, split_type, + keep_empty_annotations, ) if len(self.images_path) == 0: @@ -117,9 +119,7 @@ def _validate_inputs(self, partition, split_type, annotation_type): if split_type not in ["random", "stratified"]: raise ValueError("split_type should be either 'random', 'stratified'") if annotation_type not in ["tag", "polygon", "bounding_box"]: - raise ValueError( - "annotation_type should be either 'tag', 'bounding_box', or 'polygon'" - ) + raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'") def _setup_annotations_and_images( self, @@ -130,19 +130,21 @@ def _setup_annotations_and_images( split, partition, split_type, + keep_empty_annotations: bool = False, ): # Find all the annotations and their corresponding images for annotation_path in sorted(annotations_dir.glob("**/*.json")): darwin_json = stream_darwin_json(annotation_path) + image_path = get_image_path_from_stream(darwin_json, images_dir) if image_path.exists(): + if not keep_empty_annotations and is_stream_list_empty(darwin_json["annotations"]): + continue self.images_path.append(image_path) self.annotations_path.append(annotation_path) continue else: - raise ValueError( - f"Annotation ({annotation_path}) does not have a corresponding image" - ) + raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image {image_path}") def _initial_setup(self, dataset_path, release_name): assert dataset_path is not None @@ -201,9 +203,7 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]: parsed = parse_darwin_json(self.annotations_path[index], index) return parsed.image_height, parsed.image_width - def extend( - self, dataset: "LocalDataset", extend_classes: bool = False - ) -> "LocalDataset": + def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset": """ Extends the current dataset with another one. @@ -298,10 +298,7 @@ def parse_json(self, index: int) -> Dict[str, Any]: # Filter out unused classes and annotations of a different type if self.classes is not None: annotations = [ - a - for a in annotations - if a.annotation_class.name in self.classes - and self.annotation_type_supported(a) + a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a) ] return { "image_id": index, @@ -318,20 +315,15 @@ def annotation_type_supported(self, annotation) -> bool: elif self.annotation_type == "bounding_box": is_bounding_box = annotation_type == "bounding_box" is_supported_polygon = ( - annotation_type in ["polygon", "complex_polygon"] - and "bounding_box" in annotation.data + annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data ) return is_bounding_box or is_supported_polygon elif self.annotation_type == "polygon": return annotation_type in ["polygon", "complex_polygon"] else: - raise ValueError( - "annotation_type should be either 'tag', 'bounding_box', or 'polygon'" - ) + raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'") - def measure_mean_std( - self, multi_threaded: bool = True - ) -> Tuple[np.ndarray, np.ndarray]: + def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]: """ Computes mean and std of trained images, given the train loader. @@ -354,9 +346,7 @@ def measure_mean_std( results = pool.map(self._return_mean, self.images_path) mean = np.sum(np.array(results), axis=0) / len(self.images_path) # Online image_classification deviation - results = pool.starmap( - self._return_std, [[item, mean] for item in self.images_path] - ) + results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path]) std_sum = np.sum(np.array([item[0] for item in results]), axis=0) total_pixel_count = np.sum(np.array([item[1] for item in results])) std = np.sqrt(std_sum / total_pixel_count) @@ -402,20 +392,14 @@ def _compute_weights(labels: List[int]) -> np.ndarray: @staticmethod def _return_mean(image_path: Path) -> np.ndarray: img = np.array(load_pil_image(image_path)) - mean = np.array( - [np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])] - ) + mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]) return mean / 255.0 # Loads an image with OpenCV and returns the channel wise std of the image. @staticmethod def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]: img = np.array(load_pil_image(image_path)) / 255.0 - m2 = np.square( - np.array( - [img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]] - ) - ) + m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]])) return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0 def __getitem__(self, index: int): @@ -485,10 +469,7 @@ def build_stems( """ if partition is None: - return ( - str(e.relative_to(annotations_dir).parent / e.stem) - for e in sorted(annotations_dir.glob("**/*.json")) - ) + return (str(e.relative_to(annotations_dir).parent / e.stem) for e in sorted(annotations_dir.glob("**/*.json"))) if split_type == "random": split_filename = f"{split_type}_{partition}.txt" diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index c70dab7a1..7f9cc0467 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -161,7 +161,7 @@ def split_video_annotations(self, release_name: str = "latest") -> None: frame_annotations = split_video_annotation(darwin_annotation) for frame_annotation in frame_annotations: - annotation = build_image_annotation(frame_annotation) + annotation = self._build_image_annotation(frame_annotation) video_frame_annotations_path = annotations_path / annotation_file.stem video_frame_annotations_path.mkdir(exist_ok=True, parents=True) @@ -947,3 +947,8 @@ def local_images_path(self) -> Path: def identifier(self) -> DatasetIdentifier: """The ``DatasetIdentifier`` of this ``RemoteDataset``.""" return DatasetIdentifier(team_slug=self.team, dataset_slug=self.slug) + + def _build_image_annotation( + self, annotation_file: AnnotationFile + ) -> Dict[str, Any]: + return build_image_annotation(annotation_file) diff --git a/darwin/dataset/remote_dataset_v1.py b/darwin/dataset/remote_dataset_v1.py index f5a2a36e7..57fe9d744 100644 --- a/darwin/dataset/remote_dataset_v1.py +++ b/darwin/dataset/remote_dataset_v1.py @@ -12,8 +12,9 @@ UploadHandlerV1, ) from darwin.dataset.utils import is_relative_to -from darwin.datatypes import ItemId, PathLike +from darwin.datatypes import AnnotationFile, ItemId, PathLike from darwin.exceptions import NotFound, ValidationError +from darwin.exporter.formats.darwin_1_0 import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter from darwin.utils import find_files, urljoin @@ -512,3 +513,8 @@ def import_annotation(self, item_id: ItemId, payload: Dict[str, Any]) -> None: """ self.client.import_annotation(item_id, payload=payload) + + def _build_image_annotation( + self, annotation_file: AnnotationFile + ) -> Dict[str, Any]: + return build_image_annotation(annotation_file) diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 70d335d3c..215ff05b8 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -22,8 +22,9 @@ UploadHandlerV2, ) from darwin.dataset.utils import is_relative_to -from darwin.datatypes import ItemId, PathLike +from darwin.datatypes import AnnotationFile, ItemId, PathLike from darwin.exceptions import NotFound, UnknownExportVersion +from darwin.exporter.formats.darwin import build_image_annotation from darwin.item import DatasetItem from darwin.item_sorter import ItemSorter from darwin.utils import find_files, urljoin @@ -543,3 +544,8 @@ def _fetch_stages(self, stage_type): workflow_id, [stage for stage in workflow["stages"] if stage["type"] == stage_type], ) + + def _build_image_annotation( + self, annotation_file: AnnotationFile + ) -> Dict[str, Any]: + return build_image_annotation(annotation_file) diff --git a/darwin/exporter/formats/darwin.py b/darwin/exporter/formats/darwin.py index eadab47ad..1c7f3ef88 100644 --- a/darwin/exporter/formats/darwin.py +++ b/darwin/exporter/formats/darwin.py @@ -3,6 +3,8 @@ import deprecation import darwin.datatypes as dt + +# from darwin.datatypes import PolygonPath, PolygonPaths from darwin.version import __version__ DEPRECATION_MESSAGE = """ @@ -16,65 +18,151 @@ def build_image_annotation(annotation_file: dt.AnnotationFile) -> Dict[str, Any]: """ - Builds and returns a dictionary with the annotations present in the given file. + Builds and returns a dictionary with the annotations present in the given file in Darwin v2 format. Parameters ---------- - annotation_file: dt.AnnotationFile + annotation_file: AnnotationFile File with the image annotations to extract. + For schema, see: https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json Returns ------- Dict[str, Any] - A dictionary with the annotation from the given file. Has the following structure: - - .. code-block:: python - - { - "annotations": [ - { - "annotation_type": { ... }, # annotation_data - "name": "annotation class name", - "bounding_box": { ... } # Optional parameter, only present if the file has a bounding box as well - } - ], - "image": { - "filename": "a_file_name.json", - "height": 1000, - "width": 2000, - "url": "https://www.darwin.v7labs.com/..." - } - } + A dictionary with the annotations in Darwin v2 format. """ - annotations: List[Dict[str, Any]] = [] - print(annotations) + annotations_list: List[Dict[str, Any]] = [] + for annotation in annotation_file.annotations: - payload = { - annotation.annotation_class.annotation_type: _build_annotation_data( - annotation - ), - "name": annotation.annotation_class.name, - } + annotation_data = _build_v2_annotation_data(annotation) + annotations_list.append(annotation_data) + + slots_data = _build_slots_data(annotation_file.slots) + item = _build_item_data(annotation_file) + item["slots"] = slots_data + + return { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": item, + "annotations": annotations_list, + } + + +def _build_v2_annotation_data(annotation: dt.Annotation) -> Dict[str, Any]: + annotation_data = {"id": annotation.id, "name": annotation.annotation_class.name} + + if annotation.annotation_class.annotation_type == "bounding_box": + annotation_data["bounding_box"] = _build_bounding_box_data(annotation.data) + elif annotation.annotation_class.annotation_type == "tag": + annotation_data["tag"] = {} + elif ( + annotation.annotation_class.annotation_type == "polygon" + or annotation.annotation_class.annotation_type == "complex_polygon" + ): + polygon_data = _build_polygon_data(annotation.data) + annotation_data["polygon"] = polygon_data + annotation_data["bounding_box"] = _build_bounding_box_data(annotation.data) + + return annotation_data + + +def _build_bounding_box_data(data: Dict[str, Any]) -> Dict[str, Any]: + if "bounding_box" in data: + data = data["bounding_box"] + return { + "h": data.get("h"), + "w": data.get("w"), + "x": data.get("x"), + "y": data.get("y"), + } + + +def _build_polygon_data(data: Dict[str, Any]) -> Dict[str, Any]: + """ + Builds the polygon data for Darwin V2 format from Darwin internal format (looks like V1). + + Parameters + ---------- + data : Dict[str, Any] + The original data for the polygon annotation. + + Returns + ------- + Dict[str, List[List[Dict[str, float]]]] + The polygon data in the format required for Darwin v2 annotations. + """ + + # Complex polygon + if "paths" in data: + return {"paths": data["paths"]} + else: + return {"paths": [data["path"]]} - if ( - annotation.annotation_class.annotation_type == "complex_polygon" - or annotation.annotation_class.annotation_type == "polygon" - ) and "bounding_box" in annotation.data: - payload["bounding_box"] = annotation.data["bounding_box"] - annotations.append(payload) +def _build_item_data(annotation_file: dt.AnnotationFile) -> Dict[str, Any]: + """ + Constructs the 'item' section of the Darwin v2 format annotation. + + Parameters + ---------- + annotation_file: dt.AnnotationFile + The AnnotationFile object containing annotation data. + Returns + ------- + Dict[str, Any] + The 'item' section of the Darwin v2 format annotation. + """ return { - "annotations": annotations, - "image": { - "filename": annotation_file.filename, - "height": annotation_file.image_height, - "width": annotation_file.image_width, - "url": annotation_file.image_url, + "name": annotation_file.filename, + "path": annotation_file.remote_path or "/", + "source_info": { + "dataset": { + "name": annotation_file.dataset_name, + "slug": annotation_file.dataset_name.lower().replace(" ", "-") + if annotation_file.dataset_name + else None, + }, + "item_id": annotation_file.item_id, + "team": { + "name": None, # TODO Replace with actual team name + "slug": None, # TODO Replace with actual team slug + }, + "workview_url": annotation_file.workview_url, }, } +def _build_slots_data(slots: List[dt.Slot]) -> List[Dict[str, Any]]: + """ + Constructs the 'slots' data for the Darwin v2 format annotation. + + Parameters + ---------- + slots: List[Slot] + A list of Slot objects from the AnnotationFile. + + Returns + ------- + List[Dict[str, Any]] + The 'slots' data for the Darwin v2 format annotation. + """ + slots_data = [] + for slot in slots: + slot_data = { + "type": slot.type, + "slot_name": slot.name, + "width": slot.width, + "height": slot.height, + "thumbnail_url": slot.thumbnail_url, + "source_files": slot.source_files, + } + slots_data.append(slot_data) + + return slots_data + + @deprecation.deprecated( deprecated_in="0.7.8", removed_in="0.8.0", diff --git a/darwin/exporter/formats/darwin_1_0.py b/darwin/exporter/formats/darwin_1_0.py index 5654e5f5b..f78af61e8 100644 --- a/darwin/exporter/formats/darwin_1_0.py +++ b/darwin/exporter/formats/darwin_1_0.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Iterable, List, Union +from typing import Any, Dict, Iterable, List, Union import orjson as json @@ -45,23 +45,17 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N try: output: DictFreeForm = _build_json(annotation_file) except Exception as e: - raise ExportException_CouldNotBuildOutput( - f"Could not build output for {annotation_file.path}" - ) from e + raise ExportException_CouldNotBuildOutput(f"Could not build output for {annotation_file.path}") from e try: with open(output_file_path, "w") as f: op = json.dumps( output, - option=json.OPT_INDENT_2 - | json.OPT_SERIALIZE_NUMPY - | json.OPT_NON_STR_KEYS, + option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS, ).decode("utf-8") f.write(op) except Exception as e: - raise ExportException_CouldNotWriteFile( - f"Could not write output for {annotation_file.path}" - ) from e + raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e def _build_json(annotation_file: AnnotationFile) -> DictFreeForm: @@ -136,17 +130,11 @@ def _build_sub_annotation(sub: SubAnnotation) -> DictFreeForm: def _build_authorship(annotation: Union[VideoAnnotation, Annotation]) -> DictFreeForm: annotators = {} if annotation.annotators: - annotators = { - "annotators": [ - _build_author(annotator) for annotator in annotation.annotators - ] - } + annotators = {"annotators": [_build_author(annotator) for annotator in annotation.annotators]} reviewers = {} if annotation.reviewers: - reviewers = { - "annotators": [_build_author(reviewer) for reviewer in annotation.reviewers] - } + reviewers = {"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]} return {**annotators, **reviewers} @@ -155,9 +143,7 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm: return { **annotation.get_data( only_keyframes=False, - post_processing=lambda annotation, _: _build_image_annotation( - annotation, skip_slots=True - ), + post_processing=lambda annotation, _: _build_image_annotation(annotation, skip_slots=True), ), "name": annotation.annotation_class.name, "slot_names": annotation.slot_names, @@ -165,9 +151,7 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm: } -def _build_image_annotation( - annotation: Annotation, skip_slots: bool = False -) -> DictFreeForm: +def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> DictFreeForm: json_subs = {} for sub in annotation.subs: json_subs.update(_build_sub_annotation(sub)) @@ -185,9 +169,7 @@ def _build_image_annotation( return {**base_json, "slot_names": annotation.slot_names} -def _build_legacy_annotation_data( - annotation_class: AnnotationClass, data: DictFreeForm -) -> DictFreeForm: +def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm: v1_data = {} polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"} @@ -213,3 +195,71 @@ def _build_metadata(annotation_file: AnnotationFile) -> DictFreeForm: return {"metadata": annotation_file.slots[0].metadata} else: return {} + + +def build_image_annotation(annotation_file: AnnotationFile) -> Dict[str, Any]: + """ + Builds and returns a dictionary with the annotations present in the given file. + + Parameters + ---------- + annotation_file: dt.AnnotationFile + File with the image annotations to extract. + + Returns + ------- + Dict[str, Any] + A dictionary with the annotation from the given file. Has the following structure: + + .. code-block:: python + + { + "annotations": [ + { + "annotation_type": { ... }, # annotation_data + "name": "annotation class name", + "bounding_box": { ... } # Optional parameter, only present if the file has a bounding box as well + } + ], + "image": { + "filename": "a_file_name.json", + "height": 1000, + "width": 2000, + "url": "https://www.darwin.v7labs.com/..." + } + } + """ + annotations: List[Dict[str, Any]] = [] + for annotation in annotation_file.annotations: + payload = { + annotation.annotation_class.annotation_type: _build_annotation_data(annotation), + "name": annotation.annotation_class.name, + } + + if ( + annotation.annotation_class.annotation_type == "complex_polygon" + or annotation.annotation_class.annotation_type == "polygon" + ) and "bounding_box" in annotation.data: + payload["bounding_box"] = annotation.data["bounding_box"] + + annotations.append(payload) + + return { + "annotations": annotations, + "image": { + "filename": annotation_file.filename, + "height": annotation_file.image_height, + "width": annotation_file.image_width, + "url": annotation_file.image_url, + }, + } + + +def _build_annotation_data(annotation: Annotation) -> Dict[str, Any]: + if annotation.annotation_class.annotation_type == "complex_polygon": + return {"path": annotation.data["paths"]} + + if annotation.annotation_class.annotation_type == "polygon": + return dict(filter(lambda item: item[0] != "bounding_box", annotation.data.items())) + + return dict(annotation.data) diff --git a/darwin/future/meta/objects/base.py b/darwin/future/meta/objects/base.py index 77eafcd7e..359f4cad1 100644 --- a/darwin/future/meta/objects/base.py +++ b/darwin/future/meta/objects/base.py @@ -48,3 +48,6 @@ def __init__( def __repr__(self) -> str: return str(self) + + def __str__(self) -> str: + return f"{self.__class__.__name__}({self._element})" diff --git a/darwin/future/meta/objects/item.py b/darwin/future/meta/objects/item.py index f2b40e5f4..7ed564e3f 100644 --- a/darwin/future/meta/objects/item.py +++ b/darwin/future/meta/objects/item.py @@ -6,8 +6,8 @@ from darwin.future.core.items.archive_items import archive_list_of_items from darwin.future.core.items.delete_items import delete_list_of_items from darwin.future.core.items.move_items_to_folder import move_list_of_items_to_folder -from darwin.future.core.items.set_item_priority import set_item_priority from darwin.future.core.items.restore_items import restore_list_of_items +from darwin.future.core.items.set_item_priority import set_item_priority from darwin.future.data_objects.item import ItemCore, ItemLayout, ItemSlot from darwin.future.meta.objects.base import MetaBase @@ -144,3 +144,9 @@ def tags(self) -> Optional[Union[List[str], Dict[str, str]]]: @property def layout(self) -> Optional[ItemLayout]: return self._element.layout + + def __str__(self) -> str: + return f"Item\n\ +- Item Name: {self._element.name}\n\ +- Item Processing Status: {self._element.processing_status}\n\ +- Item ID: {self._element.id}" diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 7bc12edc6..d2f02f807 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -603,9 +603,7 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None: if annotation.annotation_class.annotation_type in UNSUPPORTED_CLASSES: skipped_annotations.append(annotation) if len(skipped_annotations) > 0: - types = { - c.annotation_class.annotation_type for c in skipped_annotations - } # noqa: C417 + types = {c.annotation_class.annotation_type for c in skipped_annotations} # noqa: C417 console.print( f"Import of annotation class types '{', '.join(types)}' is not yet supported. Skipping {len(skipped_annotations)} " + "annotations from '{parsed_file.full_path}'.\n", diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index c324e36f3..b17a21238 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -99,9 +99,7 @@ class ClassificationDataset(LocalDataset): be composed via torchvision. """ - def __init__( - self, transform: Optional[Union[Callable, List]] = None, **kwargs - ) -> None: + def __init__(self, transform: Optional[Union[Callable, List]] = None, **kwargs) -> None: super().__init__(annotation_type="tag", **kwargs) if transform is not None and isinstance(transform, list): @@ -154,11 +152,7 @@ def get_target(self, index: int) -> Tensor: data = self.parse_json(index) annotations = data.pop("annotations") - tags = [ - a.annotation_class.name - for a in annotations - if a.annotation_class.annotation_type == "tag" - ] + tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"] if not self.is_multi_label: # Binary or multiclass must have a label per image @@ -182,11 +176,7 @@ def check_if_multi_label(self) -> None: for idx in range(len(self)): target = self.parse_json(idx) annotations = target.pop("annotations") - tags = [ - a.annotation_class.name - for a in annotations - if a.annotation_class.annotation_type == "tag" - ] + tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"] if len(tags) > 1: self.is_multi_label = True @@ -328,10 +318,13 @@ def get_target(self, index: int) -> Dict[str, Any]: for annotation in target["annotations"]: annotation_type: str = annotation.annotation_class.annotation_type path_key = "paths" if annotation_type == "complex_polygon" else "path" + + # Darwin V2 only has paths (TODO it might be more robust fixes) + if "paths" in annotation.data: + path_key = "paths" + if path_key not in annotation.data: - print( - f"Warning: missing polygon in annotation {self.annotations_path[index]}" - ) + print(f"Warning: missing polygon in annotation {self.annotations_path[index]}") # Extract the sequences of coordinates from the polygon annotation sequences = convert_polygons_to_sequences( annotation.data[path_key], @@ -360,12 +353,7 @@ def get_target(self, index: int) -> Dict[str, Any]: # Compute the area of the polygon # TODO fix with addictive/subtractive paths in complex polygons - poly_area: float = np.sum( - [ - polygon_area(x_coord, y_coord) - for x_coord, y_coord in zip(x_coords, y_coords) - ] - ) + poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) # Create and append the new entry for this annotation annotations.append( @@ -417,9 +405,7 @@ class SemanticSegmentationDataset(LocalDataset): Object used to convert polygons to semantic masks. """ - def __init__( - self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs - ): + def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs): super().__init__(annotation_type="polygon", **kwargs) if "__background__" not in self.classes: self.classes.insert(0, "__background__") @@ -489,7 +475,6 @@ def get_target(self, index: int) -> Dict[str, Any]: paths = obj.data["paths"] else: paths = [obj.data["path"]] - for path in paths: sequences = convert_polygons_to_sequences( path, @@ -634,6 +619,7 @@ def get_target(self, index: int) -> Dict[str, Tensor]: targets.append(ann) # following https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html + stacked_targets = { "boxes": torch.stack([v["bbox"] for v in targets]), "area": torch.stack([v["area"] for v in targets]), diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index fe464aa62..590a4b69e 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -24,7 +24,7 @@ import numpy as np import orjson as json import requests -from json_stream.base import PersistentStreamingJSONObject +from json_stream.base import PersistentStreamingJSONList, PersistentStreamingJSONObject from jsonschema import validators from requests import Response from rich.progress import ProgressType, track @@ -214,9 +214,7 @@ def is_project_dir(project_path: Path) -> bool: return (project_path / "releases").exists() and (project_path / "images").exists() -def get_progress_bar( - array: List[dt.AnnotationFile], description: Optional[str] = None -) -> Iterable[ProgressType]: +def get_progress_bar(array: List[dt.AnnotationFile], description: Optional[str] = None) -> Iterable[ProgressType]: """ Get a rich a progress bar for the given list of annotation files. @@ -360,9 +358,7 @@ def persist_client_configuration( api_key=team_config.api_key, datasets_dir=team_config.datasets_dir, ) - config.set_global( - api_endpoint=client.url, base_url=client.base_url, default_team=default_team - ) + config.set_global(api_endpoint=client.url, base_url=client.base_url, default_team=default_team) return config @@ -419,9 +415,7 @@ def attempt_decode(path: Path) -> dict: return data except Exception: continue - raise UnrecognizableFileEncoding( - f"Unable to load file {path} with any encodings: {encodings}" - ) + raise UnrecognizableFileEncoding(f"Unable to load file {path} with any encodings: {encodings}") def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]: @@ -430,9 +424,7 @@ def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]: return data, version -def parse_darwin_json( - path: Path, count: Optional[int] = None -) -> Optional[dt.AnnotationFile]: +def parse_darwin_json(path: Path, count: Optional[int] = None) -> Optional[dt.AnnotationFile]: """ Parses the given JSON file in v7's darwin proprietary format. Works for images, split frame videos (treated as images) and playback videos. @@ -492,9 +484,16 @@ def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject: return json_stream.load(infile, persistent=True) -def get_image_path_from_stream( - darwin_json: PersistentStreamingJSONObject, images_dir: Path -) -> Path: +def is_stream_list_empty(json_list: PersistentStreamingJSONList) -> bool: + try: + json_list[0] + except IndexError: + return True + + return False + + +def get_image_path_from_stream(darwin_json: PersistentStreamingJSONObject, images_dir: Path) -> Path: """ Returns the path to the image file associated with the given darwin json file (V1 or V2). @@ -511,31 +510,17 @@ def get_image_path_from_stream( Path to the image file. """ try: - return ( - images_dir - / (Path(darwin_json["item"]["path"].lstrip("/\\"))) - / Path(darwin_json["item"]["name"]) - ) + return images_dir / (Path(darwin_json["item"]["path"].lstrip("/\\"))) / Path(darwin_json["item"]["name"]) except KeyError: - return ( - images_dir - / (Path(darwin_json["image"]["path"].lstrip("/\\"))) - / Path(darwin_json["image"]["filename"]) - ) + return images_dir / (Path(darwin_json["image"]["path"].lstrip("/\\"))) / Path(darwin_json["image"]["filename"]) def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: item = data["item"] item_source = item.get("source_info", {}) - slots: List[dt.Slot] = list( - filter(None, map(_parse_darwin_slot, item.get("slots", []))) - ) - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( - data - ) - annotation_classes: Set[dt.AnnotationClass] = { - annotation.annotation_class for annotation in annotations - } + slots: List[dt.Slot] = list(filter(None, map(_parse_darwin_slot, item.get("slots", [])))) + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) + annotation_classes: Set[dt.AnnotationClass] = {annotation.annotation_class for annotation in annotations} if len(slots) == 0: annotation_file = dt.AnnotationFile( @@ -543,9 +528,7 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: path=path, filename=item["name"], item_id=item.get("source_info", {}).get("item_id", None), - dataset_name=item.get("source_info", {}) - .get("dataset", {}) - .get("name", None), + dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), annotation_classes=annotation_classes, annotations=annotations, is_video=False, @@ -566,17 +549,13 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile: path=path, filename=item["name"], item_id=item.get("source_info", {}).get("item_id", None), - dataset_name=item.get("source_info", {}) - .get("dataset", {}) - .get("name", None), + dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None), annotation_classes=annotation_classes, annotations=annotations, is_video=slot.frame_urls is not None or slot.frame_manifest is not None, image_width=slot.width, image_height=slot.height, - image_url=None - if len(slot.source_files or []) == 0 - else slot.source_files[0]["url"], + image_url=None if len(slot.source_files or []) == 0 else slot.source_files[0]["url"], image_thumbnail_url=slot.thumbnail_url, workview_url=item_source.get("workview_url", None), seq=0, @@ -606,15 +585,9 @@ def _parse_darwin_slot(data: Dict[str, Any]) -> dt.Slot: ) -def _parse_darwin_image( - path: Path, data: Dict[str, Any], count: Optional[int] -) -> dt.AnnotationFile: - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( - data - ) - annotation_classes: Set[dt.AnnotationClass] = { - annotation.annotation_class for annotation in annotations - } +def _parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) + annotation_classes: Set[dt.AnnotationClass] = {annotation.annotation_class for annotation in annotations} slot = dt.Slot( name=None, @@ -651,20 +624,12 @@ def _parse_darwin_image( return annotation_file -def _parse_darwin_video( - path: Path, data: Dict[str, Any], count: Optional[int] -) -> dt.AnnotationFile: - annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations( - data - ) - annotation_classes: Set[dt.AnnotationClass] = { - annotation.annotation_class for annotation in annotations - } +def _parse_darwin_video(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile: + annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data) + annotation_classes: Set[dt.AnnotationClass] = {annotation.annotation_class for annotation in annotations} if "width" not in data["image"] or "height" not in data["image"]: - raise OutdatedDarwinJSONFormat( - "Missing width/height in video, please re-export" - ) + raise OutdatedDarwinJSONFormat("Missing width/height in video, please re-export") slot = dt.Slot( name=None, @@ -710,41 +675,23 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati main_annotation: Optional[dt.Annotation] = None # Darwin JSON 2.0 representation of complex polygons - if ( - "polygon" in annotation - and "paths" in annotation["polygon"] - and len(annotation["polygon"]["paths"]) > 1 - ): + if "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) > 1: bounding_box = annotation.get("bounding_box") paths = annotation["polygon"]["paths"] - main_annotation = dt.make_complex_polygon( - name, paths, bounding_box, slot_names=slot_names - ) + main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) # Darwin JSON 2.0 representation of simple polygons - elif ( - "polygon" in annotation - and "paths" in annotation["polygon"] - and len(annotation["polygon"]["paths"]) == 1 - ): + elif "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) == 1: bounding_box = annotation.get("bounding_box") paths = annotation["polygon"]["paths"] - main_annotation = dt.make_polygon( - name, paths[0], bounding_box, slot_names=slot_names - ) + main_annotation = dt.make_polygon(name, paths[0], bounding_box, slot_names=slot_names) # Darwin JSON 1.0 representation of complex and simple polygons elif "polygon" in annotation: bounding_box = annotation.get("bounding_box") if "additional_paths" in annotation["polygon"]: - paths = [annotation["polygon"]["path"]] + annotation["polygon"][ - "additional_paths" - ] - main_annotation = dt.make_complex_polygon( - name, paths, bounding_box, slot_names=slot_names - ) + paths = [annotation["polygon"]["path"]] + annotation["polygon"]["additional_paths"] + main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) else: - main_annotation = dt.make_polygon( - name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names - ) + main_annotation = dt.make_polygon(name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names) # Darwin JSON 1.0 representation of complex polygons elif "complex_polygon" in annotation: bounding_box = annotation.get("bounding_box") @@ -756,9 +703,7 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati if "additional_paths" in annotation["complex_polygon"]: paths.extend(annotation["complex_polygon"]["additional_paths"]) - main_annotation = dt.make_complex_polygon( - name, paths, bounding_box, slot_names=slot_names - ) + main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names) elif "bounding_box" in annotation: bounding_box = annotation["bounding_box"] main_annotation = dt.make_bounding_box( @@ -772,9 +717,7 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati elif "tag" in annotation: main_annotation = dt.make_tag(name, slot_names=slot_names) elif "line" in annotation: - main_annotation = dt.make_line( - name, annotation["line"]["path"], slot_names=slot_names - ) + main_annotation = dt.make_line(name, annotation["line"]["path"], slot_names=slot_names) elif "keypoint" in annotation: main_annotation = dt.make_keypoint( name, @@ -783,17 +726,11 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati slot_names=slot_names, ) elif "ellipse" in annotation: - main_annotation = dt.make_ellipse( - name, annotation["ellipse"], slot_names=slot_names - ) + main_annotation = dt.make_ellipse(name, annotation["ellipse"], slot_names=slot_names) elif "cuboid" in annotation: - main_annotation = dt.make_cuboid( - name, annotation["cuboid"], slot_names=slot_names - ) + main_annotation = dt.make_cuboid(name, annotation["cuboid"], slot_names=slot_names) elif "skeleton" in annotation: - main_annotation = dt.make_skeleton( - name, annotation["skeleton"]["nodes"], slot_names=slot_names - ) + main_annotation = dt.make_skeleton(name, annotation["skeleton"]["nodes"], slot_names=slot_names) elif "table" in annotation: main_annotation = dt.make_table( name, @@ -802,9 +739,7 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati slot_names=slot_names, ) elif "string" in annotation: - main_annotation = dt.make_string( - name, annotation["string"]["sources"], slot_names=slot_names - ) + main_annotation = dt.make_string(name, annotation["string"]["sources"], slot_names=slot_names) elif "graph" in annotation: main_annotation = dt.make_graph( name, @@ -831,29 +766,19 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati if "id" in annotation: main_annotation.id = annotation["id"] if "instance_id" in annotation: - main_annotation.subs.append( - dt.make_instance_id(annotation["instance_id"]["value"]) - ) + main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"])) if "attributes" in annotation: main_annotation.subs.append(dt.make_attributes(annotation["attributes"])) if "text" in annotation: main_annotation.subs.append(dt.make_text(annotation["text"]["text"])) if "inference" in annotation: - main_annotation.subs.append( - dt.make_opaque_sub("inference", annotation["inference"]) - ) + main_annotation.subs.append(dt.make_opaque_sub("inference", annotation["inference"])) if "directional_vector" in annotation: - main_annotation.subs.append( - dt.make_opaque_sub("directional_vector", annotation["directional_vector"]) - ) + main_annotation.subs.append(dt.make_opaque_sub("directional_vector", annotation["directional_vector"])) if "measures" in annotation: - main_annotation.subs.append( - dt.make_opaque_sub("measures", annotation["measures"]) - ) + main_annotation.subs.append(dt.make_opaque_sub("measures", annotation["measures"])) if "auto_annotate" in annotation: - main_annotation.subs.append( - dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"]) - ) + main_annotation.subs.append(dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"])) if annotation.get("annotators") is not None: main_annotation.annotators = _parse_annotators(annotation["annotators"]) @@ -907,9 +832,7 @@ def _parse_darwin_raster_annotation(annotation: dict) -> Optional[dt.Annotation] slot_names: Optional[List[str]] = parse_slot_names(annotation) if not id or not name or not raster_layer: - raise ValueError( - "Raster annotation must have an 'id', 'name' and 'raster_layer' field" - ) + raise ValueError("Raster annotation must have an 'id', 'name' and 'raster_layer' field") dense_rle, mask_annotation_ids_mapping, total_pixels = ( raster_layer.get("dense_rle", None), @@ -960,14 +883,9 @@ def _parse_darwin_mask_annotation(annotation: dict) -> Optional[dt.Annotation]: def _parse_annotators(annotators: List[Dict[str, Any]]) -> List[dt.AnnotationAuthor]: if not (hasattr(annotators, "full_name") or not hasattr(annotators, "email")): - raise AttributeError( - "JSON file must contain annotators with 'full_name' and 'email' fields" - ) + raise AttributeError("JSON file must contain annotators with 'full_name' and 'email' fields") - return [ - dt.AnnotationAuthor(annotator["full_name"], annotator["email"]) - for annotator in annotators - ] + return [dt.AnnotationAuthor(annotator["full_name"], annotator["email"]) for annotator in annotators] def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationFile]: @@ -1003,13 +921,9 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF frame_annotations = [] for i, frame_url in enumerate(urls): annotations = [ - a.frames[i] - for a in annotation.annotations - if isinstance(a, dt.VideoAnnotation) and i in a.frames + a.frames[i] for a in annotation.annotations if isinstance(a, dt.VideoAnnotation) and i in a.frames ] - annotation_classes: Set[dt.AnnotationClass] = { - annotation.annotation_class for annotation in annotations - } + annotation_classes: Set[dt.AnnotationClass] = {annotation.annotation_class for annotation in annotations} filename: str = f"{Path(annotation.filename).stem}/{i:07d}.png" frame_annotations.append( dt.AnnotationFile( @@ -1025,8 +939,10 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF annotation.seq, item_id=annotation.item_id, slots=annotation.slots, + remote_path=annotation.remote_path, ) ) + return frame_annotations @@ -1093,9 +1009,7 @@ def convert_polygons_to_sequences( else: list_polygons = cast(List[dt.Polygon], [polygons]) - if not isinstance(list_polygons[0], list) or not isinstance( - list_polygons[0][0], dict - ): + if not isinstance(list_polygons[0], list) or not isinstance(list_polygons[0][0], dict): raise ValueError("Unknown input format") sequences: List[List[Union[int, float]]] = [] @@ -1236,9 +1150,7 @@ def convert_bounding_box_to_xyxy(box: dt.BoundingBox) -> List[float]: return [box["x"], box["y"], x2, y2] -def convert_polygons_to_mask( - polygons: List, height: int, width: int, value: Optional[int] = 1 -) -> np.ndarray: +def convert_polygons_to_mask(polygons: List, height: int, width: int, value: Optional[int] = 1) -> np.ndarray: """ Converts a list of polygons, encoded as a list of dictionaries into an ``nd.array`` mask. @@ -1332,38 +1244,24 @@ def _parse_version(data: dict) -> dt.AnnotationFileVersion: return dt.AnnotationFileVersion(int(major), int(minor), suffix) -def _data_to_annotations( - data: Dict[str, Any] -) -> List[Union[dt.Annotation, dt.VideoAnnotation]]: +def _data_to_annotations(data: Dict[str, Any]) -> List[Union[dt.Annotation, dt.VideoAnnotation]]: raw_image_annotations = filter( lambda annotation: ( - ("frames" not in annotation) - and ("raster_layer" not in annotation) - and ("mask" not in annotation) + ("frames" not in annotation) and ("raster_layer" not in annotation) and ("mask" not in annotation) ), data["annotations"], ) - raw_video_annotations = filter( - lambda annotation: "frames" in annotation, data["annotations"] - ) - raw_raster_annotations = filter( - lambda annotation: "raster_layer" in annotation, data["annotations"] - ) - raw_mask_annotations = filter( - lambda annotation: "mask" in annotation, data["annotations"] - ) - image_annotations: List[dt.Annotation] = list( - filter(None, map(_parse_darwin_annotation, raw_image_annotations)) - ) + raw_video_annotations = filter(lambda annotation: "frames" in annotation, data["annotations"]) + raw_raster_annotations = filter(lambda annotation: "raster_layer" in annotation, data["annotations"]) + raw_mask_annotations = filter(lambda annotation: "mask" in annotation, data["annotations"]) + image_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_annotation, raw_image_annotations))) video_annotations: List[dt.VideoAnnotation] = list( filter(None, map(_parse_darwin_video_annotation, raw_video_annotations)) ) raster_annotations: List[dt.Annotation] = list( filter(None, map(_parse_darwin_raster_annotation, raw_raster_annotations)) ) - mask_annotations: List[dt.Annotation] = list( - filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations)) - ) + mask_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations))) return [ *image_annotations, @@ -1384,6 +1282,4 @@ def _supported_schema_versions() -> Dict[Tuple[int, int, str], str]: def _default_schema(version: dt.AnnotationFileVersion) -> Optional[str]: - return _supported_schema_versions().get( - (version.major, version.minor, version.suffix) - ) + return _supported_schema_versions().get((version.major, version.minor, version.suffix)) diff --git a/deploy/revert_nightly_setup.py b/deploy/revert_nightly_setup.py index bd1e6f698..c5b4a161b 100644 --- a/deploy/revert_nightly_setup.py +++ b/deploy/revert_nightly_setup.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from datetime import datetime -from os import path from pathlib import Path diff --git a/e2e_tests/conftest.py b/e2e_tests/conftest.py index b44f786da..d26b4fbac 100644 --- a/e2e_tests/conftest.py +++ b/e2e_tests/conftest.py @@ -9,7 +9,7 @@ from darwin.future.data_objects.typing import UnknownType from e2e_tests.exceptions import E2EEnvironmentVariableNotSet -from e2e_tests.objects import ConfigValues, E2EDataset +from e2e_tests.objects import ConfigValues from e2e_tests.setup_tests import setup_tests, teardown_tests diff --git a/e2e_tests/helpers.py b/e2e_tests/helpers.py index 8d3b3d393..0c14b37f3 100644 --- a/e2e_tests/helpers.py +++ b/e2e_tests/helpers.py @@ -1,18 +1,10 @@ -import re -import tempfile -import uuid -from pathlib import Path from subprocess import run from time import sleep -from typing import Generator, Optional, Tuple +from typing import Optional -import pytest from attr import dataclass -from cv2 import exp from darwin.exceptions import DarwinException -from e2e_tests.objects import E2EDataset -from e2e_tests.setup_tests import create_random_image @dataclass diff --git a/e2e_tests/test_darwin.py b/e2e_tests/test_darwin.py index dd03d8f3c..1f495817d 100644 --- a/e2e_tests/test_darwin.py +++ b/e2e_tests/test_darwin.py @@ -4,7 +4,6 @@ import tempfile import uuid from pathlib import Path -from time import sleep from typing import Generator import pytest diff --git a/tests/darwin/dataset/download_manager_test.py b/tests/darwin/dataset/download_manager_test.py index de9e1da4e..4f46c1ca1 100644 --- a/tests/darwin/dataset/download_manager_test.py +++ b/tests/darwin/dataset/download_manager_test.py @@ -1,16 +1,10 @@ from pathlib import Path from typing import List -from unittest.mock import MagicMock, patch import pytest import responses -from requests import get -from darwin.client import Client -from darwin.config import Config from darwin.dataset import download_manager as dm -from darwin.dataset.identifier import DatasetIdentifier -from darwin.dataset.remote_dataset_v1 import RemoteDatasetV1 from darwin.datatypes import Slot from tests.fixtures import * @@ -42,16 +36,16 @@ def test_parse_manifests(manifest_paths: List[Path]) -> None: assert len(segment_manifests[3].items) == 2 assert segment_manifests[0].items[0].absolute_frame == 0 assert segment_manifests[0].items[1].absolute_frame == 1 - assert segment_manifests[0].items[1].visibility == True + assert segment_manifests[0].items[1].visibility is True assert segment_manifests[1].items[0].absolute_frame == 2 assert segment_manifests[1].items[1].absolute_frame == 3 - assert segment_manifests[1].items[1].visibility == True + assert segment_manifests[1].items[1].visibility is True assert segment_manifests[2].items[0].absolute_frame == 4 assert segment_manifests[2].items[1].absolute_frame == 5 - assert segment_manifests[2].items[1].visibility == True + assert segment_manifests[2].items[1].visibility is True assert segment_manifests[3].items[0].absolute_frame == 6 assert segment_manifests[3].items[1].absolute_frame == 7 - assert segment_manifests[3].items[1].visibility == True + assert segment_manifests[3].items[1].visibility is True def test_get_segment_manifests( @@ -70,13 +64,13 @@ def test_get_segment_manifests( assert len(segment_manifests[3].items) == 2 assert segment_manifests[0].items[0].absolute_frame == 0 assert segment_manifests[0].items[1].absolute_frame == 1 - assert segment_manifests[0].items[1].visibility == True + assert segment_manifests[0].items[1].visibility is True assert segment_manifests[1].items[0].absolute_frame == 2 assert segment_manifests[1].items[1].absolute_frame == 3 - assert segment_manifests[1].items[1].visibility == True + assert segment_manifests[1].items[1].visibility is True assert segment_manifests[2].items[0].absolute_frame == 4 assert segment_manifests[2].items[1].absolute_frame == 5 - assert segment_manifests[2].items[1].visibility == True + assert segment_manifests[2].items[1].visibility is True assert segment_manifests[3].items[0].absolute_frame == 6 assert segment_manifests[3].items[1].absolute_frame == 7 - assert segment_manifests[3].items[1].visibility == True + assert segment_manifests[3].items[1].visibility is True diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py index 1fced67f5..939f9cc36 100644 --- a/tests/darwin/dataset/remote_dataset_test.py +++ b/tests/darwin/dataset/remote_dataset_test.py @@ -390,7 +390,6 @@ def test_works_on_videos( / "test_video" ) assert video_path.exists() - assert (video_path / "0000000.json").exists() assert (video_path / "0000001.json").exists() assert (video_path / "0000002.json").exists() diff --git a/tests/darwin/exporter/formats/export_darwin_1_0_test.py b/tests/darwin/exporter/formats/export_darwin_1_0_test.py index 5df8cb300..dac3a2806 100644 --- a/tests/darwin/exporter/formats/export_darwin_1_0_test.py +++ b/tests/darwin/exporter/formats/export_darwin_1_0_test.py @@ -7,10 +7,7 @@ class TestBuildJson: def test_empty_annotation_file(self): annotation_file = dt.AnnotationFile( - path=Path("test.json"), - filename="test.json", - annotation_classes=[], - annotations=[], + path=Path("test.json"), filename="test.json", annotation_classes=[], annotations=[] ) assert _build_json(annotation_file) == { @@ -60,10 +57,13 @@ def test_complete_annotation_file(self): {"x": 531.6440000000002, "y": 428.4196}, {"x": 529.8140000000002, "y": 426.5896}, ] + bounding_box = {"x": 557.66, "y": 428.98, "w": 160.76, "h": 315.3} annotation_class = dt.AnnotationClass(name="test", annotation_type="polygon") annotation = dt.Annotation( - annotation_class=annotation_class, data={"path": polygon_path}, subs=[] + annotation_class=annotation_class, + data={"path": polygon_path, "bounding_box": bounding_box}, + subs=[], ) annotation_file = dt.AnnotationFile( @@ -89,7 +89,12 @@ def test_complete_annotation_file(self): "workview_url": None, }, "annotations": [ - {"polygon": {"path": polygon_path}, "name": "test", "slot_names": []} + { + "polygon": {"path": polygon_path}, + "name": "test", + "slot_names": [], + "bounding_box": bounding_box, + } ], "dataset": "None", } @@ -114,12 +119,8 @@ def test_complex_polygon(self): ], ] - annotation_class = dt.AnnotationClass( - name="test", annotation_type="complex_polygon" - ) - annotation = dt.Annotation( - annotation_class=annotation_class, data={"paths": polygon_path}, subs=[] - ) + annotation_class = dt.AnnotationClass(name="test", annotation_type="complex_polygon") + annotation = dt.Annotation(annotation_class=annotation_class, data={"paths": polygon_path}, subs=[]) annotation_file = dt.AnnotationFile( path=Path("test.json"), @@ -224,9 +225,7 @@ def test_complex_polygon_with_bbox(self): bounding_box = {"x": 557.66, "y": 428.98, "w": 160.76, "h": 315.3} - annotation_class = dt.AnnotationClass( - name="test", annotation_type="complex_polygon" - ) + annotation_class = dt.AnnotationClass(name="test", annotation_type="complex_polygon") annotation = dt.Annotation( annotation_class=annotation_class, data={"paths": polygon_path, "bounding_box": bounding_box}, @@ -268,11 +267,83 @@ def test_complex_polygon_with_bbox(self): def test_bounding_box(self): bounding_box_data = {"x": 100, "y": 150, "w": 50, "h": 30} - annotation_class = dt.AnnotationClass( - name="bbox_test", annotation_type="bounding_box" + annotation_class = dt.AnnotationClass(name="bbox_test", annotation_type="bounding_box") + annotation = dt.Annotation(annotation_class=annotation_class, data=bounding_box_data, subs=[]) + + annotation_file = dt.AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=[annotation_class], + annotations=[annotation], + image_height=1080, + image_width=1920, + image_url="https://darwin.v7labs.com/image.jpg", + ) + + assert _build_json(annotation_file) == { + "image": { + "seq": None, + "width": 1920, + "height": 1080, + "filename": "test.json", + "original_filename": "test.json", + "url": "https://darwin.v7labs.com/image.jpg", + "thumbnail_url": None, + "path": None, + "workview_url": None, + }, + "annotations": [ + { + "bounding_box": bounding_box_data, + "name": "bbox_test", + "slot_names": [], + } + ], + "dataset": "None", + } + + def test_tags(self): + tag_data = "sample_tag" + annotation_class = dt.AnnotationClass(name="tag_test", annotation_type="tag") + annotation = dt.Annotation(annotation_class=annotation_class, data=tag_data, subs=[]) + + annotation_file = dt.AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=[annotation_class], + annotations=[annotation], + image_height=1080, + image_width=1920, + image_url="https://darwin.v7labs.com/image.jpg", ) + assert _build_json(annotation_file) == { + "image": { + "seq": None, + "width": 1920, + "height": 1080, + "filename": "test.json", + "original_filename": "test.json", + "url": "https://darwin.v7labs.com/image.jpg", + "thumbnail_url": None, + "path": None, + "workview_url": None, + }, + "annotations": [{"tag": {}, "name": "tag_test", "slot_names": []}], + "dataset": "None", + } + + def test_polygon_annotation_file_with_bbox(self): + polygon_path = [ + {"x": 534.1440000000002, "y": 429.0896}, + {"x": 531.6440000000002, "y": 428.4196}, + {"x": 529.8140000000002, "y": 426.5896}, + ] + + bounding_box = {"x": 557.66, "y": 428.98, "w": 160.76, "h": 315.3} + + annotation_class = dt.AnnotationClass(name="test", annotation_type="polygon") annotation = dt.Annotation( - annotation_class=annotation_class, data=bounding_box_data, subs=[] + annotation_class=annotation_class, data={"path": polygon_path, "bounding_box": bounding_box}, subs=[] ) annotation_file = dt.AnnotationFile( @@ -285,6 +356,99 @@ def test_bounding_box(self): image_url="https://darwin.v7labs.com/image.jpg", ) + assert _build_json(annotation_file) == { + "image": { + "seq": None, + "width": 1920, + "height": 1080, + "filename": "test.json", + "original_filename": "test.json", + "url": "https://darwin.v7labs.com/image.jpg", + "thumbnail_url": None, + "path": None, + "workview_url": None, + }, + "annotations": [ + {"polygon": {"path": polygon_path}, "name": "test", "slot_names": [], "bounding_box": bounding_box} + ], + "dataset": "None", + } + + def test_complex_polygon_with_bbox(self): + polygon_path = [ + [ + {"x": 230.06, "y": 174.04}, + {"x": 226.39, "y": 170.36}, + {"x": 224.61, "y": 166.81}, + ], + [ + {"x": 238.98, "y": 171.69}, + {"x": 236.97, "y": 174.04}, + {"x": 238.67, "y": 174.04}, + ], + [ + {"x": 251.75, "y": 169.77}, + {"x": 251.75, "y": 154.34}, + {"x": 251.08, "y": 151.84}, + {"x": 249.25, "y": 150.01}, + ], + ] + + bounding_box = {"x": 557.66, "y": 428.98, "w": 160.76, "h": 315.3} + + annotation_class = dt.AnnotationClass(name="test", annotation_type="complex_polygon") + annotation = dt.Annotation( + annotation_class=annotation_class, data={"paths": polygon_path, "bounding_box": bounding_box}, subs=[] + ) + + annotation_file = dt.AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=[annotation_class], + annotations=[annotation], + image_height=1080, + image_width=1920, + image_url="https://darwin.v7labs.com/image.jpg", + ) + + assert _build_json(annotation_file) == { + "image": { + "seq": None, + "width": 1920, + "height": 1080, + "filename": "test.json", + "original_filename": "test.json", + "url": "https://darwin.v7labs.com/image.jpg", + "thumbnail_url": None, + "path": None, + "workview_url": None, + }, + "annotations": [ + { + "complex_polygon": {"path": polygon_path}, + "name": "test", + "slot_names": [], + "bounding_box": bounding_box, + } + ], + "dataset": "None", + } + + def test_bounding_box(self): + bounding_box_data = {"x": 100, "y": 150, "w": 50, "h": 30} + annotation_class = dt.AnnotationClass(name="bbox_test", annotation_type="bounding_box") + annotation = dt.Annotation(annotation_class=annotation_class, data=bounding_box_data, subs=[]) + + annotation_file = dt.AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=[annotation_class], + annotations=[annotation], + image_height=1080, + image_width=1920, + image_url="https://darwin.v7labs.com/image.jpg", + ) + assert _build_json(annotation_file) == { "image": { "seq": None, @@ -310,9 +474,7 @@ def test_bounding_box(self): def test_tags(self): tag_data = "sample_tag" annotation_class = dt.AnnotationClass(name="tag_test", annotation_type="tag") - annotation = dt.Annotation( - annotation_class=annotation_class, data=tag_data, subs=[] - ) + annotation = dt.Annotation(annotation_class=annotation_class, data=tag_data, subs=[]) annotation_file = dt.AnnotationFile( path=Path("test.json"), diff --git a/tests/darwin/exporter/formats/export_darwin_test.py b/tests/darwin/exporter/formats/export_darwin_test.py index 5274c9b45..aa11fe900 100644 --- a/tests/darwin/exporter/formats/export_darwin_test.py +++ b/tests/darwin/exporter/formats/export_darwin_test.py @@ -1,27 +1,45 @@ from pathlib import Path from darwin.datatypes import Annotation, AnnotationClass, AnnotationFile -from darwin.exporter.formats.darwin import build_image_annotation +from darwin.exporter.formats.darwin import ( + _build_v2_annotation_data, + build_image_annotation, +) -def test_empty_annotation_file(): +def test_empty_annotation_file_v2(): annotation_file = AnnotationFile( path=Path("test.json"), filename="test.json", annotation_classes=[], annotations=[], + dataset_name="Test Dataset", ) - assert build_image_annotation(annotation_file) == { + expected_output = { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "test.json", + "path": "/", + "source_info": { + "dataset": {"name": "Test Dataset", "slug": "test-dataset"}, + "item_id": None, + "team": {"name": None, "slug": None}, + "workview_url": None, + }, + "slots": [], # Include an empty slots list as per Darwin v2 format + }, "annotations": [], - "image": {"filename": "test.json", "height": None, "url": None, "width": None}, } + assert build_image_annotation(annotation_file) == expected_output + -def test_complete_annotation_file(): +def test_complete_annotation_file_v2(): annotation_class = AnnotationClass(name="test", annotation_type="polygon") annotation = Annotation( - annotation_class=annotation_class, data={"path": []}, subs=[] + id="12345", annotation_class=annotation_class, data={"paths": [[]]}, subs=[] ) annotation_file = AnnotationFile( @@ -29,17 +47,84 @@ def test_complete_annotation_file(): filename="test.json", annotation_classes=[annotation_class], annotations=[annotation], - image_height=1080, - image_width=1920, - image_url="https://darwin.v7labs.com/image.jpg", + dataset_name="Test Dataset", ) - assert build_image_annotation(annotation_file) == { - "annotations": [{"name": "test", "polygon": {"path": []}}], - "image": { - "filename": "test.json", - "height": 1080, - "url": "https://darwin.v7labs.com/image.jpg", - "width": 1920, + expected_output = { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "test.json", + "path": "/", + "source_info": { + "dataset": {"name": "Test Dataset", "slug": "test-dataset"}, + "item_id": None, + "team": {"name": None, "slug": None}, + "workview_url": None, + }, + "slots": [], # Include an empty slots list as per Darwin v2 format }, + "annotations": [_build_v2_annotation_data(annotation)], } + + assert build_image_annotation(annotation_file) == expected_output + + +def test_complete_annotation_file_with_bounding_box_and_tag_v2(): + # Annotation for a polygon + polygon_class = AnnotationClass(name="polygon_test", annotation_type="polygon") + polygon_annotation = Annotation( + id="polygon_id", + annotation_class=polygon_class, + data={"paths": [[{"x": 10, "y": 10}, {"x": 20, "y": 20}]]}, + subs=[], + ) + + # Annotation for a bounding box + bbox_class = AnnotationClass(name="bbox_test", annotation_type="bounding_box") + bbox_annotation = Annotation( + id="bbox_id", + annotation_class=bbox_class, + data={"h": 100, "w": 200, "x": 50, "y": 60}, + subs=[], + ) + + # Annotation for a tag + tag_class = AnnotationClass(name="tag_test", annotation_type="tag") + tag_annotation = Annotation( + id="tag_id", + annotation_class=tag_class, + data={}, # Assuming tag annotations have empty data + subs=[], + ) + + annotation_file = AnnotationFile( + path=Path("test.json"), + filename="test.json", + annotation_classes=[polygon_class, bbox_class, tag_class], + annotations=[polygon_annotation, bbox_annotation, tag_annotation], + dataset_name="Test Dataset", + ) + + expected_output = { + "version": "2.0", + "schema_ref": "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json", + "item": { + "name": "test.json", + "path": "/", + "source_info": { + "dataset": {"name": "Test Dataset", "slug": "test-dataset"}, + "item_id": None, + "team": {"name": None, "slug": None}, + "workview_url": None, + }, + "slots": [], # Include an empty slots list as per Darwin v2 format + }, + "annotations": [ + _build_v2_annotation_data(polygon_annotation), + _build_v2_annotation_data(bbox_annotation), + _build_v2_annotation_data(tag_annotation), + ], + } + + assert build_image_annotation(annotation_file) == expected_output diff --git a/tests/darwin/exporter/formats/export_mask_test.py b/tests/darwin/exporter/formats/export_mask_test.py index d8a47e502..4e932d2d7 100644 --- a/tests/darwin/exporter/formats/export_mask_test.py +++ b/tests/darwin/exporter/formats/export_mask_test.py @@ -1,9 +1,8 @@ import csv -import os import platform from pathlib import Path -from tempfile import NamedTemporaryFile, TemporaryDirectory -from typing import Callable, Dict, List, Optional +from tempfile import TemporaryDirectory +from typing import Dict, List, Optional from unittest.mock import patch import numpy as np @@ -16,7 +15,6 @@ except ImportError: NDArray = Any # type:ignore from PIL import Image -from upolygon import draw_polygon from darwin import datatypes as dt from darwin.exporter.formats.mask import ( @@ -293,7 +291,7 @@ def test_beyond_polygon_beyond_window() -> None: annotation_file = dt.AnnotationFile( Path("testfile"), "testfile", - set([a.annotation_class for a in annotations]), + {a.annotation_class for a in annotations}, annotations, ) height, width = 5, 5 @@ -346,7 +344,7 @@ def test_beyond_complex_polygon() -> None: annotation_file = dt.AnnotationFile( Path("testfile"), "testfile", - set([a.annotation_class for a in annotations]), + {a.annotation_class for a in annotations}, annotations, ) height, width = 5, 5 @@ -439,7 +437,7 @@ def test_render_polygons() -> None: annotation_file = dt.AnnotationFile( Path("testfile"), "testfile", - set([a.annotation_class for a in annotations]), + {a.annotation_class for a in annotations}, annotations, ) height = 100 @@ -477,7 +475,7 @@ def test_render_raster() -> None: for c in "1212213111132231132123132221223231113221112111233221121231311132313311221123131313131331113221311322333312233113311333132133212131122313313223111221323331221233312221221233133232232211321321311321133113123232322233222331223321121121113133313113232323122131123322122233311131213132123232322221113131331212212322133121231221213113231322121332222121232133222321311213312332321321212321222121113223321113311333313222232213123121221132332113321132133121221212131123113233313112322332112312113112321222331332121311132312221331312222211113232131112123331121311213113321121223323323232211323113333333321323332312332232332223332123213211332131112121131112233321131112121233131331133223211131333223123121322221332333311213331231122133311131211132231233111322123331223311231323121233233231222331331211322123213112211211231222323113331211113311331332221331131311112213322313322233213122133112313311322133223123221211113333222311222311133331312113322321312312122321133111133233313321221323231331223131321213332123331232123323313332232211312211133221113122322332131212112312121211113122221222131112333322323222312232311312321132212113311111131111113123133323333331212133312312122331212323223311121332232133212333212213132121321232211212233333313311332321231111333122133321211131312221113331112112121122212122322132213113123222231212331312233312113213233233312323211133132131133122311122321132233112313212312122332331312131213213223233222213112312111221131111232223123322133322111221323233333331313221222233322233221213131212322121112323312312321111333132323113331132312232231322232332223223211331322222231122211111311323221331111112123231131212131231112322322321333112331223111311311113123233223123311321322313231222311112113131133111233212121322212131221231222331233222212333312222223313232111111121113132221223332121222311121312322313221211131323111112233231131123111131122321312212112313221131221321221212331333232323132131131211223322221312331122123131332322322321212232232112321112313313322231122222331222323221113211121121322211223212133111332111112133213213323112112232223222333223312312123211122223333332321112322311132311113133233332132322332113121223323313232331211121333111123132321322331132131211331322222212113213321322111233311212131121322231132313221112122113213313312121331322131131112113311112232212222232112222213213111231231311111333233113122321113133323113231112121211113231232313233233333221333333311221131223111213122213112332311331211211113231212132322133211212121211312333331332322211213331311312223233212223312112121311323232122333221232213323122322122313332121212313221332233211222113222232223212233211313311132313212213312112111121332231231131232321313122332311312232321121233332131122131113212331223211322333232221321311133332231312122311321322222132232323123311133133122332313122231131111323133331233221121111111331122323111133331112323122123113213122332122222113113321132222312223131323123323222131323321231211312222213131333123132333133321323131231212311133222232133321333111212231331133131312333231333213321321212311123131232211123212123231122122321111132323321113131331233321323122232313311332111112321211232132112313132322111313121112231312333131212221322122123331322123212121333311111332312222132321333133323211113321113111333232333312231212123232322223122332233133222211112113121322113231212323322132331111133231131312223212123222121121323212123232221331113112321322212323323331231311321233331331331221322323231221313111132121331123221131211112211212323221322113323112333213323232333313321232123332231232223323331133222232122222112112123323212131133121331233311222121112231313111332322112122232133122111323123133123112233323121113133223132223333333332332211331321111212323212121113232313322123131321312132113321323233311123222121333232322321121332322133323123332322112111131233212111131122113332133222113221122222133112333123121322323331232113133322312222233113223312123112332211132322213313231313133111321113321131222122331311331312131322111323111113123322112122312223333113133112322231323123213231231312323311331112111122212312312131332333223221112222311232131333211232323233112221123111132232332111321313323231312212113232331212211232121213233221211231312132131312231222122131213233321211132312311321323211323223311323223311313313131311121312122322121211123113231123212133231322122321232221131212311323221323233332122133213111311122133323312122123112332332313132322313233312233322221111133212112231333222221233312311223211311331213121133231212233211132122331332223222322223122233232112211233123312222231131232232113113221212333133131311332313321321331122232221322123233323211232323212312211312132321321123123333131132332331133131122132332112333123323232211122232213333112232223312332112222223313132122212131233322131113132322312233113232311231323211332231233223312233221232323332311133322322112133122133211312233321123232212332132222213333233212213313133333223232333121322212321213321333212321213223323133113213131222232233212322331232331231223222331112111322312222113133112321231331213121211122332232322133321123133111312132133122132111322312232332213322233121121331312221121213231222131223231113311321331123333122111211332231312313321213221331223123323112112222232132132123212112221212122313311322122232223112331233111131221321121132333221323323123123332133311223123121231222322231121122211121111132121222322311323231212322211211133111221313122133332132323321211112121331113322232231323133323121221111111323233213232212312331133123323133132331112213122111313222312333332333111212123311323231132222332333323233132133213223131133332221223212112323121221212331131322223232123132323232131111312233221122112122213112232321312132112323221322111332232123132312231111232132221121221212222323311232223123111123233322211121111221223222223331213132123321212222212113212121132233312332132131311231311232233322323312221211211122121323131121323221313122232131121313312123321212311213131133223332131122213331311333221312323232223223331113331233312112112111111233321231133121122123132222312121211322333213233313222123123113332331131223231332232123312232132222233223312233322331231232112323321312132211133311321313132221312113212333322132313321132313111213122313132111321222333333322211322122312233111323123121333321222311332223311232212222132312231313131132223133113312312311322321311113131233333213321312223322213132213222113221221221213231312321313223323233122311323121212113311321221221313131113211222332213213133123311213323122223313321313132313322211123123221223312113311211112123223313321322323233212121213121113132323113233332233211132112121212221313332311332123211231211321331233131133311221213311121323311111313112213232312312212311112333113331333121123123313111323331121213323323223111221331211211131111331233233122223321112123321231212321232221122122333313223211222333113212111221121113221221111133323111323121211311132113121221233322312232221333333212111131233321122312213311233332321123131321113221131323223323312113133231311132221113112132132113123232132321112232213222221213122123133212321222131132131123133133122232323122233213123311131121213221311222311332211113312211221212131112113312132233231222121213132323121212232321112333221333311231311223322321111232232112323233233322213213123111111122313212232113331233111311311131122212311131121133122112222331212332321312133333131313313223231123232213322131211233212112332331123132232132132222211123122111213232332223212213223112111332221121113211111113111311133322213132223312232113321132221232221123131311231313113122111211122122322333231121113131323113232122113232111121213222311131231112122113333123321322223233323212322331233332112132333111211112212322313312132211231122222113322133323132311212332231211312333123121132122233212312123311111311331222131123323111122321112213212111322121131111123312332122211213133211312211132223212323133121212323113322131123212322233231323122113322213222332332133212313313312211213232131222311132321332232223212112222212113212211131223323332212322323222233332311322132113322231333231333121122312122313322113221212221231333133112133222112113111332113331122312123331332131113111213333122331111332122313123231331221223131131233132113122312212212321121222121123113333131123321232313113212313111322322133221333223221333213212333312233212231113331111133212312311111122232322233231332313223113331233112223123313123221113211213331331221121222323111213322231232133333233332132223133121323213122232312333323221322211121331122312123223122132122232233322322231112223333113213113112322213212132112122212121233121212123332321312322211222222321122231222312312231213213123132232333213113213323313311123133322312231231123232213133222221233212111111221313332113131333223223222132132333213221131132131323132233323221331132221111222211322321223213132221311323332132223223212323313221222232211311222321223321333331323221232133121321213121111113212112211331132122321333322232211321313113311221133312322212211111222133233322332123111113212112233133111331121322223223231212133223333332211231232331331212132133323222133133131322123323232221122123133331113222132133333211131133112211333323112121233311323112222331311212113111232113221213122333133213231333111213223222133113321112122322211131322212112211323333323332213331112132121132123231112223131222313331331313232232322213311113223331122232121311221121231131323321211133212332112121332223211321311312232111123322113121323333212222213111333311133322221311112333313222222231311331223113323212312211211323223113211223323113113131331213132313323231322313123111221221131123121221211112133112131332331211113313322322321322132111312331311131132313123312231111333133211122233212232311223131332213133223331232113122112122221231232221112332221223312223322332221223211222223332112311312313331122221211211132322231312331311222322132331233133113323133322331322221223331332211233222332113313233332123121112211121131131321222233223312233312122213133232123321232333232233213331123132313113221133322233213123113131212321213113322323133231321211323311123232312132311212322122233121" ] mask = np.zeros((100, 100), dtype=np.uint8) - colours: dt.MaskTypes.ColoursDict = dict() + colours: dt.MaskTypes.ColoursDict = {} categories: dt.MaskTypes.CategoryList = [] annotations: List[dt.AnnotationLike] = [ dt.Annotation( @@ -516,7 +514,7 @@ def test_render_raster() -> None: annotation_file = dt.AnnotationFile( Path("path"), annotations=annotations, - annotation_classes=set([c.annotation_class for c in annotations]), + annotation_classes={c.annotation_class for c in annotations}, filename="test.txt", ) @@ -556,17 +554,16 @@ def test_render_raster() -> None: GREEN = [0, 255, 0] BLUE = [0, 0, 255] BLACK = [0, 0, 0] -colours_for_test: Callable[[], dt.MaskTypes.RgbColors] = lambda: [ - *BLACK, - *RED, - *GREEN, - *BLUE, -] -colour_list_for_test: Callable[[], dt.MaskTypes.ColoursDict] = lambda: { - "mask1": 0, - "mask2": 1, - "mask3": 2, -} + + +def colours_for_test() -> dt.MaskTypes.RgbColors: + return [*BLACK, *RED, *GREEN, *BLUE] + + +def colour_list_for_test() -> dt.MaskTypes.ColoursDict: + return {"mask1": 0, "mask2": 1, "mask3": 2} + + data_path = (Path(__file__).parent / ".." / ".." / "data").resolve() @@ -857,7 +854,7 @@ def test_class_mappings_preserved_on_large_export(tmpdir) -> None: ] # Pixel sizes of polygons, for used in asserting the correct colour is mapped to the correct class sizes = {"cat1": 8, "cat2": 9, "cat3": 16, "cat4": 10} - sizes["__background__"] = height * width - sum([x for x in sizes.values()]) + sizes["__background__"] = height * width - sum(list(sizes.values())) annotation_files = [ dt.AnnotationFile( Path(f"test{x}"), diff --git a/tests/darwin/exporter/formats/export_nifti_test.py b/tests/darwin/exporter/formats/export_nifti_test.py index d987d7779..a27ded009 100644 --- a/tests/darwin/exporter/formats/export_nifti_test.py +++ b/tests/darwin/exporter/formats/export_nifti_test.py @@ -73,9 +73,9 @@ def test_video_annotation_nifti_export_mpr(team_slug: str): ) nifti.export(video_annotations, output_dir=tmpdir) export_im = nib.load( - annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz" + annotations_dir / "hippocampus_001_mpr_1_test_hippo.nii.gz" ).get_fdata() expected_im = nib.load( - annotations_dir / f"hippocampus_001_mpr_1_test_hippo.nii.gz" + annotations_dir / "hippocampus_001_mpr_1_test_hippo.nii.gz" ).get_fdata() assert np.allclose(export_im, expected_im) diff --git a/tests/darwin/importer/formats/import_nifti_test.py b/tests/darwin/importer/formats/import_nifti_test.py index 8527d153d..666c46878 100644 --- a/tests/darwin/importer/formats/import_nifti_test.py +++ b/tests/darwin/importer/formats/import_nifti_test.py @@ -139,7 +139,7 @@ def test_image_annotation_nifti_import_incorrect_number_slot(team_slug: str): json.dumps(input_dict, indent=4, sort_keys=True, default=str) ) with pytest.raises(Exception): - annotation_files = parse_path(path=upload_json) + parse_path(path=upload_json) def serialise_annotation_file( diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py index 240ffb28a..9659e995b 100644 --- a/tests/darwin/importer/importer_test.py +++ b/tests/darwin/importer/importer_test.py @@ -1,5 +1,5 @@ from typing import List, Tuple -from unittest.mock import MagicMock, Mock, _patch, patch +from unittest.mock import Mock, _patch, patch import pytest from rich.theme import Theme @@ -182,9 +182,7 @@ def test__get_annotation_data() -> None: video_annotation_class = dt.AnnotationClass("video_class", "video") annotation = dt.Annotation(annotation_class, {}, [], []) - video_annotation = dt.VideoAnnotation( - video_annotation_class, dict(), dict(), [], False - ) + video_annotation = dt.VideoAnnotation(video_annotation_class, {}, {}, [], False) annotation.data = "TEST DATA" diff --git a/tests/darwin/torch/utils_test.py b/tests/darwin/torch/utils_test.py index 28fc2c280..c3388d766 100644 --- a/tests/darwin/torch/utils_test.py +++ b/tests/darwin/torch/utils_test.py @@ -40,8 +40,8 @@ def test_should_raise_with_incorrect_shaped_inputs( ) -> None: masks, _ = basic_masks_with_cats cats = [0] - with pytest.raises(AssertionError) as error: - flattened = flatten_masks_by_category(masks, cats) + with pytest.raises(AssertionError): + flatten_masks_by_category(masks, cats) def test_should_correctly_set_overlap(self, basic_masks_with_cats: Tuple) -> None: masks, cats = basic_masks_with_cats diff --git a/tests/darwin/utils/find_files_test.py b/tests/darwin/utils/find_files_test.py index 184ae9925..64c4dadce 100644 --- a/tests/darwin/utils/find_files_test.py +++ b/tests/darwin/utils/find_files_test.py @@ -1,8 +1,8 @@ from dataclasses import dataclass -from pathlib import Path, PosixPath -from typing import Any, Callable, Dict, List, Optional -from unittest import TestCase, skip -from unittest.mock import MagicMock, patch +from pathlib import Path +from typing import Callable, Optional +from unittest import TestCase +from unittest.mock import patch from darwin.exceptions import UnsupportedFileType from darwin.utils import ( @@ -10,7 +10,6 @@ SUPPORTED_IMAGE_EXTENSIONS, SUPPORTED_VIDEO_EXTENSIONS, find_files, - is_extension_allowed, ) diff --git a/tests/darwin/utils_test.py b/tests/darwin/utils_test.py index aef7be2f3..b8cfaa3b9 100644 --- a/tests/darwin/utils_test.py +++ b/tests/darwin/utils_test.py @@ -1,7 +1,6 @@ from unittest.mock import MagicMock, patch import pytest -from jsonschema.exceptions import ValidationError from requests import Response import darwin.datatypes as dt @@ -148,7 +147,7 @@ def test_parses_darwin_images_correctly(self, tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "P49-RediPad-ProPlayLEFTY_442.jpg" - assert annotation_file.dataset_name == None + assert annotation_file.dataset_name is None assert annotation_file.version == dt.AnnotationFileVersion( major=1, minor=0, suffix="" ) @@ -237,7 +236,7 @@ def test_parses_darwin_videos_correctly(self, tmp_path): assert annotation_file.path == import_file assert annotation_file.filename == "above tractor.mp4" - assert annotation_file.dataset_name == None + assert annotation_file.dataset_name is None assert annotation_file.version == dt.AnnotationFileVersion( major=1, minor=0, suffix="" ) @@ -849,7 +848,7 @@ def test_parses_a_raster_annotation( assert annotation.annotation_class.name == "my_raster_annotation" assert annotation.annotation_class.annotation_type == "mask" - assert annotation.data["sparse_rle"] == None + assert annotation.data["sparse_rle"] is None # Sad paths @pytest.mark.parametrize("parameter_name", ["id", "name", "mask", "slot_names"]) diff --git a/tests/e2e_test_internals/test_run_cli_command.py b/tests/e2e_test_internals/test_run_cli_command.py index 4fae8aa54..9504fc0b4 100644 --- a/tests/e2e_test_internals/test_run_cli_command.py +++ b/tests/e2e_test_internals/test_run_cli_command.py @@ -1,5 +1,4 @@ from collections import namedtuple -from http import server from unittest import mock import pytest