v7labs · JBWilkie · Dec 13, 2023 · Dec 13, 2023
diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py
@@ -80,9 +80,7 @@ def __init__(
         self.original_annotations_path: Optional[List[Path]] = None
         self.keep_empty_annotations = keep_empty_annotations
 
-        release_path, annotations_dir, images_dir = self._initial_setup(
-            dataset_path, release_name
-        )
+        release_path, annotations_dir, images_dir = self._initial_setup(dataset_path, release_name)
         self._validate_inputs(partition, split_type, annotation_type)
         # Get the list of classes
 
@@ -122,9 +120,7 @@ def _validate_inputs(self, partition, split_type, annotation_type):
         if split_type not in ["random", "stratified"]:
             raise ValueError("split_type should be either 'random', 'stratified'")
         if annotation_type not in ["tag", "polygon", "bounding_box"]:
-            raise ValueError(
-                "annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
-            )
+            raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
 
     def _setup_annotations_and_images(
         self,
@@ -152,9 +148,7 @@ def _setup_annotations_and_images(
                 darwin_json, images_dir, with_folders, json_version, annotation_filepath
             )
             if image_path.exists():
-                if not keep_empty_annotations and is_stream_list_empty(
-                    darwin_json["annotations"]
-                ):
+                if not keep_empty_annotations and is_stream_list_empty(darwin_json["annotations"]):
                     continue
                 self.images_path.append(image_path)
                 self.annotations_path.append(annotation_filepath)
@@ -221,9 +215,7 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]:
         parsed = parse_darwin_json(self.annotations_path[index], index)
         return parsed.image_height, parsed.image_width
 
-    def extend(
-        self, dataset: "LocalDataset", extend_classes: bool = False
-    ) -> "LocalDataset":
+    def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset":
         """
         Extends the current dataset with another one.
 
@@ -318,10 +310,7 @@ def parse_json(self, index: int) -> Dict[str, Any]:
         # Filter out unused classes and annotations of a different type
         if self.classes is not None:
             annotations = [
-                a
-                for a in annotations
-                if a.annotation_class.name in self.classes
-                and self.annotation_type_supported(a)
+                a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a)
             ]
         return {
             "image_id": index,
@@ -338,20 +327,15 @@ def annotation_type_supported(self, annotation) -> bool:
         elif self.annotation_type == "bounding_box":
             is_bounding_box = annotation_type == "bounding_box"
             is_supported_polygon = (
-                annotation_type in ["polygon", "complex_polygon"]
-                and "bounding_box" in annotation.data
+                annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data
             )
             return is_bounding_box or is_supported_polygon
         elif self.annotation_type == "polygon":
             return annotation_type in ["polygon", "complex_polygon"]
         else:
-            raise ValueError(
-                "annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
-            )
+            raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
 
-    def measure_mean_std(
-        self, multi_threaded: bool = True
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]:
         """
         Computes mean and std of trained images, given the train loader.
 
@@ -374,9 +358,7 @@ def measure_mean_std(
                 results = pool.map(self._return_mean, self.images_path)
                 mean = np.sum(np.array(results), axis=0) / len(self.images_path)
                 # Online image_classification deviation
-                results = pool.starmap(
-                    self._return_std, [[item, mean] for item in self.images_path]
-                )
+                results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path])
                 std_sum = np.sum(np.array([item[0] for item in results]), axis=0)
                 total_pixel_count = np.sum(np.array([item[1] for item in results]))
                 std = np.sqrt(std_sum / total_pixel_count)
@@ -422,20 +404,14 @@ def _compute_weights(labels: List[int]) -> np.ndarray:
     @staticmethod
     def _return_mean(image_path: Path) -> np.ndarray:
         img = np.array(load_pil_image(image_path))
-        mean = np.array(
-            [np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]
-        )
+        mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])])
         return mean / 255.0
 
     # Loads an image with OpenCV and returns the channel wise std of the image.
     @staticmethod
     def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]:
         img = np.array(load_pil_image(image_path)) / 255.0
-        m2 = np.square(
-            np.array(
-                [img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]
-            )
-        )
+        m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]))
         return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0
 
     def __getitem__(self, index: int):

diff --git a/darwin/exporter/formats/darwin_1_0.py b/darwin/exporter/formats/darwin_1_0.py
@@ -45,23 +45,17 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N
     try:
         output: DictFreeForm = _build_json(annotation_file)
     except Exception as e:
-        raise ExportException_CouldNotBuildOutput(
-            f"Could not build output for {annotation_file.path}"
-        ) from e
+        raise ExportException_CouldNotBuildOutput(f"Could not build output for {annotation_file.path}") from e
 
     try:
         with open(output_file_path, "w") as f:
             op = json.dumps(
                 output,
-                option=json.OPT_INDENT_2
-                | json.OPT_SERIALIZE_NUMPY
-                | json.OPT_NON_STR_KEYS,
+                option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS,
             ).decode("utf-8")
             f.write(op)
     except Exception as e:
-        raise ExportException_CouldNotWriteFile(
-            f"Could not write output for {annotation_file.path}"
-        ) from e
+        raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e
 
 
 def _build_json(annotation_file: AnnotationFile) -> DictFreeForm:
@@ -136,17 +130,11 @@ def _build_sub_annotation(sub: SubAnnotation) -> DictFreeForm:
 def _build_authorship(annotation: Union[VideoAnnotation, Annotation]) -> DictFreeForm:
     annotators = {}
     if annotation.annotators:
-        annotators = {
-            "annotators": [
-                _build_author(annotator) for annotator in annotation.annotators
-            ]
-        }
+        annotators = {"annotators": [_build_author(annotator) for annotator in annotation.annotators]}
 
     reviewers = {}
     if annotation.reviewers:
-        reviewers = {
-            "annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]
-        }
+        reviewers = {"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]}
 
     return {**annotators, **reviewers}
 
@@ -155,19 +143,15 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm:
     return {
         **annotation.get_data(
             only_keyframes=False,
-            post_processing=lambda annotation, _: _build_image_annotation(
-                annotation, skip_slots=True
-            ),
+            post_processing=lambda annotation, _: _build_image_annotation(annotation, skip_slots=True),
         ),
         "name": annotation.annotation_class.name,
         "slot_names": annotation.slot_names,
         **_build_authorship(annotation),
     }
 
 
-def _build_image_annotation(
-    annotation: Annotation, skip_slots: bool = False
-) -> DictFreeForm:
+def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> DictFreeForm:
     json_subs = {}
     for sub in annotation.subs:
         json_subs.update(_build_sub_annotation(sub))
@@ -185,9 +169,7 @@ def _build_image_annotation(
         return {**base_json, "slot_names": annotation.slot_names}
 
 
-def _build_legacy_annotation_data(
-    annotation_class: AnnotationClass, data: DictFreeForm
-) -> DictFreeForm:
+def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm:
     v1_data = {}
     polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"}
 
@@ -250,9 +232,7 @@ def build_image_annotation(annotation_file: AnnotationFile) -> Dict[str, Any]:
     annotations: List[Dict[str, Any]] = []
     for annotation in annotation_file.annotations:
         payload = {
-            annotation.annotation_class.annotation_type: _build_annotation_data(
-                annotation
-            ),
+            annotation.annotation_class.annotation_type: _build_annotation_data(annotation),
             "name": annotation.annotation_class.name,
         }
 
@@ -280,8 +260,6 @@ def _build_annotation_data(annotation: Annotation) -> Dict[str, Any]:
         return {"path": annotation.data["paths"]}
 
     if annotation.annotation_class.annotation_type == "polygon":
-        return dict(
-            filter(lambda item: item[0] != "bounding_box", annotation.data.items())
-        )
+        return dict(filter(lambda item: item[0] != "bounding_box", annotation.data.items()))
 
     return dict(annotation.data)
diff --git a/darwin/importer/formats/csv_tags_video.py b/darwin/importer/formats/csv_tags_video.py
@@ -51,7 +51,9 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]:
             file_annotation_map[filename].append(annotation)
     for filename in file_annotation_map:
         annotations = file_annotation_map[filename]
-        annotation_classes = {annotation.annotation_class for annotation in annotations}
+        annotation_classes = {
+            annotation.annotation_class for annotation in annotations
+        }
         filename_path = Path(filename)
         remote_path = str(filename_path.parent)
         if not remote_path.startswith("/"):

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
@@ -603,9 +603,7 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None:
             if annotation.annotation_class.annotation_type in UNSUPPORTED_CLASSES:
                 skipped_annotations.append(annotation)
         if len(skipped_annotations) > 0:
-            types = {
-                c.annotation_class.annotation_type for c in skipped_annotations
-            }  # noqa: C417
+            types = {c.annotation_class.annotation_type for c in skipped_annotations}  # noqa: C417
             console.print(
                 f"Import of annotation class types '{', '.join(types)}' is not yet supported. Skipping {len(skipped_annotations)} "
                 + "annotations from '{parsed_file.full_path}'.\n",

diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py
@@ -99,9 +99,7 @@ class ClassificationDataset(LocalDataset):
         be composed via torchvision.
     """
 
-    def __init__(
-        self, transform: Optional[Union[Callable, List]] = None, **kwargs
-    ) -> None:
+    def __init__(self, transform: Optional[Union[Callable, List]] = None, **kwargs) -> None:
         super().__init__(annotation_type="tag", **kwargs)
 
         if transform is not None and isinstance(transform, list):
@@ -154,11 +152,7 @@ def get_target(self, index: int) -> Tensor:
 
         data = self.parse_json(index)
         annotations = data.pop("annotations")
-        tags = [
-            a.annotation_class.name
-            for a in annotations
-            if a.annotation_class.annotation_type == "tag"
-        ]
+        tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]
 
         if not self.is_multi_label:
             # Binary or multiclass must have a label per image
@@ -182,11 +176,7 @@ def check_if_multi_label(self) -> None:
         for idx in range(len(self)):
             target = self.parse_json(idx)
             annotations = target.pop("annotations")
-            tags = [
-                a.annotation_class.name
-                for a in annotations
-                if a.annotation_class.annotation_type == "tag"
-            ]
+            tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]
 
             if len(tags) > 1:
                 self.is_multi_label = True
@@ -334,15 +324,12 @@ def get_target(self, index: int) -> Dict[str, Any]:
                 path_key = "paths"
 
             if path_key not in annotation.data:
-                print(
-                    f"Warning: missing polygon in annotation {self.annotations_path[index]}"
-                )
+                print(f"Warning: missing polygon in annotation {self.annotations_path[index]}")
             # Extract the sequences of coordinates from the polygon annotation
             sequences = convert_polygons_to_sequences(
                 annotation.data[path_key],
                 height=target["height"],
                 width=target["width"],
-                rounding=False,
             )
             # Compute the bbox of the polygon
             x_coords = [s[0::2] for s in sequences]
@@ -366,12 +353,7 @@ def get_target(self, index: int) -> Dict[str, Any]:
 
             # Compute the area of the polygon
             # TODO fix with addictive/subtractive paths in complex polygons
-            poly_area: float = np.sum(
-                [
-                    polygon_area(x_coord, y_coord)
-                    for x_coord, y_coord in zip(x_coords, y_coords)
-                ]
-            )
+            poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])
 
             # Create and append the new entry for this annotation
             annotations.append(
@@ -423,9 +405,7 @@ class SemanticSegmentationDataset(LocalDataset):
         Object used to convert polygons to semantic masks.
     """
 
-    def __init__(
-        self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs
-    ):
+    def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs):
         super().__init__(annotation_type="polygon", **kwargs)
         if "__background__" not in self.classes:
             self.classes.insert(0, "__background__")