From 574ed68325900daf368e839073cadd970530247d Mon Sep 17 00:00:00 2001 From: Christoffer Date: Wed, 20 Sep 2023 14:41:23 +0200 Subject: [PATCH 01/11] added albumentation transform, clamp on bbox in obj det and changed instance seg to x,y,w,h format --- darwin/torch/dataset.py | 5 ++- darwin/torch/transforms.py | 68 ++++++++++++++++++++++++++++++++++++-- darwin/torch/utils.py | 9 +++++ 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index 3d5b8883e..1b29d3bad 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -15,7 +15,7 @@ ConvertPolygonsToInstanceMasks, ConvertPolygonsToSemanticMask, ) -from darwin.torch.utils import polygon_area +from darwin.torch.utils import clamp_bbox_to_image_size, polygon_area from darwin.utils import convert_polygons_to_sequences @@ -546,6 +546,9 @@ def __getitem__(self, index: int): img: PILImage.Image = self.get_image(index) target: Dict[str, Any] = self.get_target(index) + width, height = img.size + target = clamp_bbox_to_image_size(target, width, height) + if self.transform is not None: img_tensor, target = self.transform(img, target) else: diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 6e466a377..a91e379e6 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -8,7 +8,6 @@ from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category - TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"] TargetType = Dict[TargetKey, torch.Tensor] @@ -191,7 +190,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - boxes[:, 2:] += boxes[:, :2] boxes[:, 0::2].clamp_(min=0, max=w) boxes[:, 1::2].clamp_(min=0, max=h) @@ -278,3 +276,69 @@ def __call__(self, image: PILImage.Image, annotation: Dict[str, Any]) -> Tuple[P target = torch.zeros((h, w), dtype=torch.uint8) target = PILImage.fromarray(target.numpy()) return image, target + + +class AlbumentationsTransform(object): + """ + Applies albumentation augmentations + """ + + def __init__(self, transform): + self.transform = transform + + @classmethod + def from_path(cls, config_path): + transform = A.load(config_path) + return cls(transform) + + @classmethod + def from_dict(cls, alb_dict): + transform = A.from_dict(alb_dict) + return cls(transform) + + def __call__(self, image, annotation): + + np_image = np.array(image) + albu_data = self.pre_process(np_image, annotation) + transformed_data = self.transform(**albu_data) + image, transformed_annotation = self.post_process(transformed_data, annotation) + + return TF.pil_to_tensor(image), transformed_annotation + + def pre_process(self, image, darwin_annotations): + + albumentation_dict = {"image": image} + width, height = image.shape[:2] + + if "boxes" in darwin_annotations: + boxes = darwin_annotations['boxes'].numpy() + # Clip the bounding box values to ensure they are within the image + boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width) + boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height) + albumentation_dict['bboxes'] = boxes.tolist() + + if "labels" in darwin_annotations: + albumentation_dict['labels'] = darwin_annotations['labels'].tolist() + + if "masks" in darwin_annotations: + albumentation_dict["mask"] = darwin_annotations['masks'].tolist() + + return albumentation_dict + + def post_process(self, albumentation_output, darwin_annotations): + + darwin_annotation = {'image_id': darwin_annotations['image_id']} + image = Image.fromarray(albumentation_output['image']) + + if "bboxes" in albumentation_output: + darwin_annotation['boxes'] = torch.tensor(albumentation_output['bboxes']) + + if "labels" in albumentation_output: + darwin_annotation['labels'] = torch.tensor(albumentation_output['labels']) + + if "boxes" in albumentation_output and "area" in darwin_annotations and not "masks" in darwin_annotations: + bboxes =transformed_annotation["boxes"] + transformed_annotation['area'] = bboxes[:,2] * bboxes[:,3] + + + return image, darwin_annotation \ No newline at end of file diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py index d75bba866..51715b89c 100644 --- a/darwin/torch/utils.py +++ b/darwin/torch/utils.py @@ -192,3 +192,12 @@ def detectron2_register_dataset( if evaluator_type: MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type) return catalog_name + + +def clamp_bbox_to_image_size(annotations, width, height): + boxes = annotations['boxes'] + boxes[:, 0::2].clamp_(min=0, max=width) + boxes[:, 1::2].clamp_(min=0, max=height) + annotations['boxes'] = boxes + + return annotations From 9c0256b8ffebc8035c845c2883b29ae8886a6cca Mon Sep 17 00:00:00 2001 From: Christoffer Date: Wed, 20 Sep 2023 15:02:39 +0200 Subject: [PATCH 02/11] removed xmin < xmax check --- darwin/torch/transforms.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index a91e379e6..bfac8adf8 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -207,12 +207,12 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. if num_keypoints: keypoints = keypoints.view(num_keypoints, -1, 3) - keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - boxes = boxes[keep] - classes = classes[keep] - masks = masks[keep] - if keypoints is not None: - keypoints = keypoints[keep] + #keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + #boxes = boxes[keep] + #classes = classes[keep] + #masks = masks[keep] + #if keypoints is not None: + # keypoints = keypoints[keep] target["boxes"] = boxes target["labels"] = classes From d1b636c735e7b3559e86ad805f7d75d3c15ccfa8 Mon Sep 17 00:00:00 2001 From: Christoffer Date: Thu, 21 Sep 2023 12:09:52 +0200 Subject: [PATCH 03/11] now albumentations supports instance segmentation and bbox --- darwin/torch/transforms.py | 121 +++++++++++++++++++++---------------- pyproject.toml | 1 + 2 files changed, 70 insertions(+), 52 deletions(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index bfac8adf8..2b3045e9f 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -1,6 +1,8 @@ import random from typing import Any, Dict, Optional, Tuple, Union +import albumentations as A +import numpy as np import torch import torchvision.transforms as transforms import torchvision.transforms.functional as F @@ -278,67 +280,82 @@ def __call__(self, image: PILImage.Image, annotation: Dict[str, Any]) -> Tuple[P return image, target -class AlbumentationsTransform(object): +class AlbumentationsTransform: """ - Applies albumentation augmentations + Wrapper class for Albumentations augmentations. """ - - def __init__(self, transform): + + def __init__(self, transform: A.Compose): self.transform = transform - - @classmethod - def from_path(cls, config_path): - transform = A.load(config_path) - return cls(transform) - + @classmethod - def from_dict(cls, alb_dict): - transform = A.from_dict(alb_dict) - return cls(transform) + def from_path(cls, config_path: str) -> 'AlbumentationsTransform': + try: + transform = A.load(config_path) + return cls(transform) + except Exception as e: + raise ValueError(f"Invalid config path: {config_path}. Error: {e}") - def __call__(self, image, annotation): - + @classmethod + def from_dict(cls, alb_dict: dict) -> 'AlbumentationsTransform': + try: + transform = A.from_dict(alb_dict) + return cls(transform) + except Exception as e: + raise ValueError(f"Invalid albumentations dictionary. Error: {e}") + + def __call__(self, image, annotation: dict) -> tuple: np_image = np.array(image) - albu_data = self.pre_process(np_image, annotation) + albu_data = self._pre_process(np_image, annotation) transformed_data = self.transform(**albu_data) - image, transformed_annotation = self.post_process(transformed_data, annotation) - - return TF.pil_to_tensor(image), transformed_annotation + image, transformed_annotation = self._post_process(transformed_data, annotation) + return F.pil_to_tensor(image), transformed_annotation - def pre_process(self, image, darwin_annotations): - + def _pre_process(self, image: np.ndarray, annotation: dict) -> dict: + """ + Prepare image and annotation for albumentations transformation. + """ albumentation_dict = {"image": image} - width, height = image.shape[:2] - if "boxes" in darwin_annotations: - boxes = darwin_annotations['boxes'].numpy() - # Clip the bounding box values to ensure they are within the image - boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width) - boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height) - albumentation_dict['bboxes'] = boxes.tolist() - - if "labels" in darwin_annotations: - albumentation_dict['labels'] = darwin_annotations['labels'].tolist() - - if "masks" in darwin_annotations: - albumentation_dict["mask"] = darwin_annotations['masks'].tolist() - - return albumentation_dict + boxes = annotation.get('boxes') + if boxes is not None: + albumentation_dict['bboxes'] = boxes.numpy().tolist() - def post_process(self, albumentation_output, darwin_annotations): - - darwin_annotation = {'image_id': darwin_annotations['image_id']} - image = Image.fromarray(albumentation_output['image']) - - if "bboxes" in albumentation_output: - darwin_annotation['boxes'] = torch.tensor(albumentation_output['bboxes']) - - if "labels" in albumentation_output: - darwin_annotation['labels'] = torch.tensor(albumentation_output['labels']) - - if "boxes" in albumentation_output and "area" in darwin_annotations and not "masks" in darwin_annotations: - bboxes =transformed_annotation["boxes"] - transformed_annotation['area'] = bboxes[:,2] * bboxes[:,3] - + labels = annotation.get('labels') + if labels is not None: + albumentation_dict['labels'] = labels.tolist() - return image, darwin_annotation \ No newline at end of file + masks = annotation.get('masks') + if masks is not None: + albumentation_dict["masks"] = masks.numpy() + + return albumentation_dict + + def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple: + """ + Process the output of albumentations transformation back to desired format. + """ + output_annotation = {'image_id': annotation['image_id']} + image = PILImage.fromarray(albumentation_output['image']) + + bboxes = albumentation_output.get('bboxes') + if bboxes is not None: + output_annotation['boxes'] = torch.tensor(bboxes) + if 'area' in annotation and 'masks' not in albumentation_output: + output_annotation['area'] = output_annotation['boxes'][:, 2] * output_annotation['boxes'][:, 3] + + labels = albumentation_output.get('labels') + if labels is not None: + output_annotation['labels'] = torch.tensor(labels) + + masks = albumentation_output.get('masks') + if masks is not None: + output_annotation['masks'] = torch.tensor(masks) + if 'area' in annotation: + output_annotation['area'] = torch.sum(output_annotation['masks'], dim=[1, 2]) + + # Copy other metadata from original annotation + for key, value in annotation.items(): + output_annotation.setdefault(key, value) + + return image, output_annotation \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 240bd9406..aeda05d69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ types-pyyaml = "^6.0.12.9" python-dotenv = { version = "^1.0.0", python = ">3.8" } opencv-python-headless = { version = "^4.8.0.76", optional = true } pyyaml = "^6.0.1" +albumentations = "^1.3.1" [tool.poetry.extras] dev = [ From 36b82a2fac956da31c4eb6be1645ec38a1117f2b Mon Sep 17 00:00:00 2001 From: Christoffer Date: Thu, 21 Sep 2023 18:03:19 +0200 Subject: [PATCH 04/11] updated tests --- darwin/torch/transforms.py | 82 ++++++++++++++---------------- tests/darwin/torch/dataset_test.py | 4 +- 2 files changed, 39 insertions(+), 47 deletions(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 2b3045e9f..7d7f95459 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -32,9 +32,7 @@ class RandomHorizontalFlip(transforms.RandomHorizontalFlip): Allows for horizontal flipping of an image, randomly. """ - def forward( - self, image: torch.Tensor, target: Optional[TargetType] = None - ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: """ May or may not horizontally flip an image depending on a random factor. @@ -77,9 +75,7 @@ class RandomVerticalFlip(transforms.RandomVerticalFlip): Allows for vertical flipping of an image, randomly. """ - def forward( - self, image: torch.Tensor, target: Optional[TargetType] = None - ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: """ May or may not vertically flip an image depending on a random factor. @@ -121,9 +117,7 @@ class ColorJitter(transforms.ColorJitter): Jitters the colors of the given transformation. """ - def __call__( - self, image: PILImage.Image, target: Optional[TargetType] = None - ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: + def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) image = transform(image) if target is None: @@ -136,9 +130,7 @@ class ToTensor(transforms.ToTensor): Converts given ``PILImage`` to a ``Tensor``. """ - def __call__( - self, image: PILImage.Image, target: Optional[TargetType] = None - ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: image_tensor: torch.Tensor = F.to_tensor(image) if target is None: return image_tensor @@ -150,9 +142,7 @@ class ToPILImage(transforms.ToPILImage): Converts given ``Tensor`` to a ``PILImage``. """ - def __call__( - self, image: torch.Tensor, target: Optional[TargetType] = None - ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: + def __call__(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: pil_image: PILImage.Image = F.to_pil_image(image) if target is None: return pil_image @@ -164,9 +154,7 @@ class Normalize(transforms.Normalize): Normalizes the given ``Tensor``. """ - def __call__( - self, tensor: torch.Tensor, target: Optional[TargetType] = None - ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def __call__(self, tensor: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: tensor = F.normalize(tensor, self.mean, self.std, self.inplace) if target is None: @@ -209,11 +197,11 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. if num_keypoints: keypoints = keypoints.view(num_keypoints, -1, 3) - #keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - #boxes = boxes[keep] - #classes = classes[keep] - #masks = masks[keep] - #if keypoints is not None: + # keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + # boxes = boxes[keep] + # classes = classes[keep] + # masks = masks[keep] + # if keypoints is not None: # keypoints = keypoints[keep] target["boxes"] = boxes @@ -289,7 +277,7 @@ def __init__(self, transform: A.Compose): self.transform = transform @classmethod - def from_path(cls, config_path: str) -> 'AlbumentationsTransform': + def from_path(cls, config_path: str) -> "AlbumentationsTransform": try: transform = A.load(config_path) return cls(transform) @@ -297,7 +285,7 @@ def from_path(cls, config_path: str) -> 'AlbumentationsTransform': raise ValueError(f"Invalid config path: {config_path}. Error: {e}") @classmethod - def from_dict(cls, alb_dict: dict) -> 'AlbumentationsTransform': + def from_dict(cls, alb_dict: dict) -> "AlbumentationsTransform": try: transform = A.from_dict(alb_dict) return cls(transform) @@ -309,23 +297,24 @@ def __call__(self, image, annotation: dict) -> tuple: albu_data = self._pre_process(np_image, annotation) transformed_data = self.transform(**albu_data) image, transformed_annotation = self._post_process(transformed_data, annotation) - return F.pil_to_tensor(image), transformed_annotation + + return image, transformed_annotation def _pre_process(self, image: np.ndarray, annotation: dict) -> dict: """ Prepare image and annotation for albumentations transformation. """ albumentation_dict = {"image": image} - - boxes = annotation.get('boxes') + + boxes = annotation.get("boxes") if boxes is not None: - albumentation_dict['bboxes'] = boxes.numpy().tolist() + albumentation_dict["bboxes"] = boxes.numpy().tolist() - labels = annotation.get('labels') + labels = annotation.get("labels") if labels is not None: - albumentation_dict['labels'] = labels.tolist() + albumentation_dict["labels"] = labels.tolist() - masks = annotation.get('masks') + masks = annotation.get("masks") if masks is not None: albumentation_dict["masks"] = masks.numpy() @@ -335,27 +324,30 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple: """ Process the output of albumentations transformation back to desired format. """ - output_annotation = {'image_id': annotation['image_id']} - image = PILImage.fromarray(albumentation_output['image']) + output_annotation = {} + image = albumentation_output["image"] - bboxes = albumentation_output.get('bboxes') + bboxes = albumentation_output.get("bboxes") if bboxes is not None: - output_annotation['boxes'] = torch.tensor(bboxes) - if 'area' in annotation and 'masks' not in albumentation_output: - output_annotation['area'] = output_annotation['boxes'][:, 2] * output_annotation['boxes'][:, 3] + output_annotation["boxes"] = torch.tensor(bboxes) + if "area" in annotation and "masks" not in albumentation_output: + output_annotation["area"] = output_annotation["boxes"][:, 2] * output_annotation["boxes"][:, 3] - labels = albumentation_output.get('labels') + labels = albumentation_output.get("labels") if labels is not None: - output_annotation['labels'] = torch.tensor(labels) + output_annotation["labels"] = torch.tensor(labels) - masks = albumentation_output.get('masks') + masks = albumentation_output.get("masks") if masks is not None: - output_annotation['masks'] = torch.tensor(masks) - if 'area' in annotation: - output_annotation['area'] = torch.sum(output_annotation['masks'], dim=[1, 2]) + if isinstance(masks[0], np.ndarray): + output_annotation["masks"] = torch.tensor(np.array(masks)) + else: + output_annotation["masks"] = torch.stack(masks) + if "area" in annotation: + output_annotation["area"] = torch.sum(output_annotation["masks"], dim=[1, 2]) # Copy other metadata from original annotation for key, value in annotation.items(): output_annotation.setdefault(key, value) - return image, output_annotation \ No newline at end of file + return image, output_annotation diff --git a/tests/darwin/torch/dataset_test.py b/tests/darwin/torch/dataset_test.py index 05eb8f5ce..cfb2c03e2 100644 --- a/tests/darwin/torch/dataset_test.py +++ b/tests/darwin/torch/dataset_test.py @@ -210,7 +210,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations( label = {k: _maybe_tensor_to_list(v) for k, v in label.items()} - assert label["boxes"] == [[4.0, 33.0, 41.0, 50.0]] + assert label["boxes"] == [[4.0, 33.0, 37.0, 17.0]] assert label["area"] == [576.0] assert label["labels"] == [1] assert label["image_id"] == [0] @@ -231,7 +231,7 @@ def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations( label = {k: _maybe_tensor_to_list(v) for k, v in label.items()} - assert label["boxes"] == [[1.0, 1.0, 41.0, 50.0]] + assert label["boxes"] == [[1.0, 1.0, 40.0, 49.0]] assert label["area"] == [592.0] assert label["labels"] == [1] assert label["image_id"] == [0] From 0a180cea13f50c5e8fd6a8c3512fba1af362bea5 Mon Sep 17 00:00:00 2001 From: Christoffer Date: Fri, 22 Sep 2023 13:13:04 +0200 Subject: [PATCH 05/11] should work for classificaiton now as well --- darwin/torch/transforms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 7d7f95459..319936ad3 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -292,8 +292,10 @@ def from_dict(cls, alb_dict: dict) -> "AlbumentationsTransform": except Exception as e: raise ValueError(f"Invalid albumentations dictionary. Error: {e}") - def __call__(self, image, annotation: dict) -> tuple: + def __call__(self, image, annotation: dict = None) -> tuple: np_image = np.array(image) + if annotation is None: + annotation = {} albu_data = self._pre_process(np_image, annotation) transformed_data = self.transform(**albu_data) image, transformed_annotation = self._post_process(transformed_data, annotation) From aab52e2e2c40fcd1babd6834940673df3bfee5eb Mon Sep 17 00:00:00 2001 From: Christoffer Date: Tue, 26 Sep 2023 10:28:50 +0200 Subject: [PATCH 06/11] cleaner up --- darwin/torch/dataset.py | 2 +- darwin/torch/transforms.py | 36 +++++++++++++++++++++++++++--------- pyproject.toml | 3 +-- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index 1b29d3bad..518d05527 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -397,7 +397,7 @@ def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, self.num_classes += 1 if transform is not None and isinstance(transform, list): transform = Compose(transform) - + self.transform: Optional[Callable] = transform self.convert_polygons = ConvertPolygonsToSemanticMask() diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 319936ad3..470335b76 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -1,13 +1,29 @@ import random +from pathlib import Path from typing import Any, Dict, Optional, Tuple, Union -import albumentations as A import numpy as np import torch import torchvision.transforms as transforms import torchvision.transforms.functional as F from PIL import Image as PILImage +# Optional dependency +try: + import albumentations as A +except ImportError: + A = None + +from typing import TYPE_CHECKING, Type + +if TYPE_CHECKING: + from albumentations.pytorch import ToTensorV2 + AType = Type[ToTensorV2] +else: + AType = Type[None] + + + from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"] @@ -197,13 +213,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. if num_keypoints: keypoints = keypoints.view(num_keypoints, -1, 3) - # keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) - # boxes = boxes[keep] - # classes = classes[keep] - # masks = masks[keep] - # if keypoints is not None: - # keypoints = keypoints[keep] - target["boxes"] = boxes target["labels"] = classes target["masks"] = masks @@ -273,13 +282,16 @@ class AlbumentationsTransform: Wrapper class for Albumentations augmentations. """ + def __init__(self, transform: A.Compose): + self._check_albumentaion_dependency() self.transform = transform @classmethod def from_path(cls, config_path: str) -> "AlbumentationsTransform": + config_path = Path(config_path) try: - transform = A.load(config_path) + transform = A.load(str(config_path)) return cls(transform) except Exception as e: raise ValueError(f"Invalid config path: {config_path}. Error: {e}") @@ -353,3 +365,9 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple: output_annotation.setdefault(key, value) return image, output_annotation + + def _check_albumentaion_dependency(self): + if A is None: + raise ImportError("The albumentations library is not installed. " + "To use this function, install it with pip install albumentations, " + "or install the ml extras of this package.") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index aeda05d69..1228a331e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,6 @@ types-pyyaml = "^6.0.12.9" python-dotenv = { version = "^1.0.0", python = ">3.8" } opencv-python-headless = { version = "^4.8.0.76", optional = true } pyyaml = "^6.0.1" -albumentations = "^1.3.1" [tool.poetry.extras] dev = [ @@ -108,7 +107,7 @@ dev = [ "flake8-pyproject", ] test = ["responses", "pytest", "flake8-pyproject"] -ml = ["torch", "torchvision", "scikit-learn"] +ml = ["torch", "torchvision", "scikit-learn", "albumentations"] medical = ["nibabel", "connected-components-3d"] ocv = ["opencv-python-headless"] From 2ccc966cf26c9fb8147979f1fc8fee7c06b039ef Mon Sep 17 00:00:00 2001 From: Christoffer Date: Tue, 26 Sep 2023 10:29:47 +0200 Subject: [PATCH 07/11] formatting --- darwin/torch/transforms.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 470335b76..516f04803 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -18,12 +18,12 @@ if TYPE_CHECKING: from albumentations.pytorch import ToTensorV2 + AType = Type[ToTensorV2] else: AType = Type[None] - from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"] @@ -282,7 +282,6 @@ class AlbumentationsTransform: Wrapper class for Albumentations augmentations. """ - def __init__(self, transform: A.Compose): self._check_albumentaion_dependency() self.transform = transform @@ -366,8 +365,10 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple: return image, output_annotation - def _check_albumentaion_dependency(self): + def _check_albumentaion_dependency(self): if A is None: - raise ImportError("The albumentations library is not installed. " - "To use this function, install it with pip install albumentations, " - "or install the ml extras of this package.") \ No newline at end of file + raise ImportError( + "The albumentations library is not installed. " + "To use this function, install it with pip install albumentations, " + "or install the ml extras of this package." + ) From 81d48fd3b3c44575b15e6ef0f8d2631fc8e55b5e Mon Sep 17 00:00:00 2001 From: Christoffer Date: Tue, 26 Sep 2023 13:37:11 +0200 Subject: [PATCH 08/11] better error handling when albumentations is not installed --- darwin/torch/transforms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 516f04803..75b682f30 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -11,6 +11,7 @@ # Optional dependency try: import albumentations as A + from albumentations import Compose except ImportError: A = None @@ -22,6 +23,7 @@ AType = Type[ToTensorV2] else: AType = Type[None] + Compose = Type[None] from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category @@ -282,7 +284,7 @@ class AlbumentationsTransform: Wrapper class for Albumentations augmentations. """ - def __init__(self, transform: A.Compose): + def __init__(self, transform: Compose): self._check_albumentaion_dependency() self.transform = transform From e5a41da2a224bd318a033ce152e0dfd8c270edbf Mon Sep 17 00:00:00 2001 From: Christoffer Date: Thu, 28 Sep 2023 11:15:28 +0200 Subject: [PATCH 09/11] fixed potential clamp issues --- darwin/torch/dataset.py | 14 ++++++++++++-- darwin/torch/utils.py | 32 ++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index 518d05527..4e6a72b01 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -333,8 +333,18 @@ def get_target(self, index: int) -> Dict[str, Any]: min_y: float = np.min([np.min(y_coord) for y_coord in y_coords]) max_x: float = np.max([np.max(x_coord) for x_coord in x_coords]) max_y: float = np.max([np.max(y_coord) for y_coord in y_coords]) - w: float = max_x - min_x + 1 - h: float = max_y - min_y + 1 + + # Clamp the coordinates to the image dimensions + min_x: float = max(0, min_x) + min_y: float = max(0, min_y) + max_x: float = min(target["width"] - 1, max_x) + max_y: float = min(target["height"] - 1, max_y) + + assert min_x < max_x and min_y < max_y + + # Convert to XYWH + w: float = max_x - (min_x + 1) + h: float = max_y - (min_y + 1) # Compute the area of the polygon # TODO fix with addictive/subtractive paths in complex polygons poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py index 51715b89c..e12d4976c 100644 --- a/darwin/torch/utils.py +++ b/darwin/torch/utils.py @@ -194,10 +194,34 @@ def detectron2_register_dataset( return catalog_name -def clamp_bbox_to_image_size(annotations, width, height): +def clamp_bbox_to_image_size(annotations, width, height, format="xywh"): + """ + Clamps bounding boxes in annotations to the given image dimensions. + + :param annotations: Dictionary containing bounding box coordinates in 'boxes' key. + :param width: Width of the image. + :param height: Height of the image. + :param format: Format of the bounding boxes, either "xywh" or "xyxy". + :return: Annotations with clamped bounding boxes. + + The function modifies the input annotations dictionary to clamp the bounding box coordinates + based on the specified format, ensuring they lie within the image dimensions. + """ boxes = annotations['boxes'] - boxes[:, 0::2].clamp_(min=0, max=width) - boxes[:, 1::2].clamp_(min=0, max=height) + + if format == "xyxy": + boxes[:, 0::2].clamp_(min=0, max=width - 1) + boxes[:, 1::2].clamp_(min=0, max=height - 1) + + elif format == "xywh": + boxes[:, 0].clamp_(min=0, max=width - boxes[:, 2] - 1) + boxes[:, 1].clamp_(min=0, max=height - boxes[:, 3] - 1) + boxes[:, 2].clamp_(min=0, max=width - boxes[:, 0] - 1) + boxes[:, 3].clamp_(min=0, max=height - boxes[:, 1] - 1) + + else: + raise ValueError(f"Unsupported bounding box format: {format}") + annotations['boxes'] = boxes - return annotations + From dd59fca1b3fcda07510ad8a655cec978074a0b95 Mon Sep 17 00:00:00 2001 From: Christoffer Date: Thu, 28 Sep 2023 12:50:08 +0200 Subject: [PATCH 10/11] adjusted tests to reflect the xyxy to xywh changes of the instance seg test --- darwin/torch/dataset.py | 8 +++---- darwin/torch/transforms.py | 26 ++++++++++++++------- darwin/torch/utils.py | 36 +++++++++++++++--------------- darwin/utils/utils.py | 4 ++-- tests/darwin/torch/dataset_test.py | 11 ++++----- tests/darwin/torch/utils_test.py | 23 ++++++++++++++++++- 6 files changed, 70 insertions(+), 38 deletions(-) diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py index 4e6a72b01..ebc3b4d17 100644 --- a/darwin/torch/dataset.py +++ b/darwin/torch/dataset.py @@ -343,8 +343,9 @@ def get_target(self, index: int) -> Dict[str, Any]: assert min_x < max_x and min_y < max_y # Convert to XYWH - w: float = max_x - (min_x + 1) - h: float = max_y - (min_y + 1) + w: float = max_x - min_x + h: float = max_y - min_y + # Compute the area of the polygon # TODO fix with addictive/subtractive paths in complex polygons poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)]) @@ -400,14 +401,13 @@ class SemanticSegmentationDataset(LocalDataset): """ def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs): - super().__init__(annotation_type="polygon", **kwargs) if not "__background__" in self.classes: self.classes.insert(0, "__background__") self.num_classes += 1 if transform is not None and isinstance(transform, list): transform = Compose(transform) - + self.transform: Optional[Callable] = transform self.convert_polygons = ConvertPolygonsToSemanticMask() diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index 75b682f30..edf95087a 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -50,7 +50,9 @@ class RandomHorizontalFlip(transforms.RandomHorizontalFlip): Allows for horizontal flipping of an image, randomly. """ - def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def forward( + self, image: torch.Tensor, target: Optional[TargetType] = None + ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: """ May or may not horizontally flip an image depending on a random factor. @@ -93,7 +95,9 @@ class RandomVerticalFlip(transforms.RandomVerticalFlip): Allows for vertical flipping of an image, randomly. """ - def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def forward( + self, image: torch.Tensor, target: Optional[TargetType] = None + ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: """ May or may not vertically flip an image depending on a random factor. @@ -135,7 +139,9 @@ class ColorJitter(transforms.ColorJitter): Jitters the colors of the given transformation. """ - def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: + def __call__( + self, image: PILImage.Image, target: Optional[TargetType] = None + ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) image = transform(image) if target is None: @@ -148,7 +154,9 @@ class ToTensor(transforms.ToTensor): Converts given ``PILImage`` to a ``Tensor``. """ - def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def __call__( + self, image: PILImage.Image, target: Optional[TargetType] = None + ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: image_tensor: torch.Tensor = F.to_tensor(image) if target is None: return image_tensor @@ -160,7 +168,9 @@ class ToPILImage(transforms.ToPILImage): Converts given ``Tensor`` to a ``PILImage``. """ - def __call__(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: + def __call__( + self, image: torch.Tensor, target: Optional[TargetType] = None + ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]: pil_image: PILImage.Image = F.to_pil_image(image) if target is None: return pil_image @@ -172,7 +182,9 @@ class Normalize(transforms.Normalize): Normalizes the given ``Tensor``. """ - def __call__(self, tensor: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: + def __call__( + self, tensor: torch.Tensor, target: Optional[TargetType] = None + ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]: tensor = F.normalize(tensor, self.mean, self.std, self.inplace) if target is None: @@ -198,8 +210,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. boxes = [obj["bbox"] for obj in annotations] # guard against no boxes via resizing boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) - boxes[:, 0::2].clamp_(min=0, max=w) - boxes[:, 1::2].clamp_(min=0, max=h) classes = [obj["category_id"] for obj in annotations] classes = torch.tensor(classes, dtype=torch.int64) diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py index e12d4976c..3080c6ac6 100644 --- a/darwin/torch/utils.py +++ b/darwin/torch/utils.py @@ -194,34 +194,34 @@ def detectron2_register_dataset( return catalog_name -def clamp_bbox_to_image_size(annotations, width, height, format="xywh"): +def clamp_bbox_to_image_size(annotations, img_width, img_height, format="xywh"): """ Clamps bounding boxes in annotations to the given image dimensions. - + :param annotations: Dictionary containing bounding box coordinates in 'boxes' key. - :param width: Width of the image. - :param height: Height of the image. + :param img_width: Width of the image. + :param img_height: Height of the image. :param format: Format of the bounding boxes, either "xywh" or "xyxy". :return: Annotations with clamped bounding boxes. - + The function modifies the input annotations dictionary to clamp the bounding box coordinates based on the specified format, ensuring they lie within the image dimensions. """ - boxes = annotations['boxes'] - + boxes = annotations["boxes"] + if format == "xyxy": - boxes[:, 0::2].clamp_(min=0, max=width - 1) - boxes[:, 1::2].clamp_(min=0, max=height - 1) - + boxes[:, 0::2].clamp_(min=0, max=img_width - 1) + boxes[:, 1::2].clamp_(min=0, max=img_height - 1) + elif format == "xywh": - boxes[:, 0].clamp_(min=0, max=width - boxes[:, 2] - 1) - boxes[:, 1].clamp_(min=0, max=height - boxes[:, 3] - 1) - boxes[:, 2].clamp_(min=0, max=width - boxes[:, 0] - 1) - boxes[:, 3].clamp_(min=0, max=height - boxes[:, 1] - 1) - + # First, clamp the x and y coordinates + boxes[:, 0].clamp_(min=0, max=img_width - 1) + boxes[:, 1].clamp_(min=0, max=img_height - 1) + # Then, clamp the width and height + boxes[:, 2].clamp_(min=torch.tensor(0), max=img_width - boxes[:, 0] - 1) # -1 since we images are zero-indexed + boxes[:, 3].clamp_(min=torch.tensor(0), max=img_height - boxes[:, 1] - 1) # -1 since we images are zero-indexed else: raise ValueError(f"Unsupported bounding box format: {format}") - - annotations['boxes'] = boxes - return annotations + annotations["boxes"] = boxes + return annotations diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py index e4907f50e..38e69c740 100644 --- a/darwin/utils/utils.py +++ b/darwin/utils/utils.py @@ -919,8 +919,8 @@ def convert_polygons_to_sequences( path: List[Union[int, float]] = [] for point in polygon: # Clip coordinates to the image size - x = max(min(point["x"], width - 1) if width else point["x"], 0) - y = max(min(point["y"], height - 1) if height else point["y"], 0) + x = max(min(point["x"], width -1) if width else point["x"], 0) + y = max(min(point["y"], height -1) if height else point["y"], 0) if rounding: path.append(round(x)) path.append(round(y)) diff --git a/tests/darwin/torch/dataset_test.py b/tests/darwin/torch/dataset_test.py index cfb2c03e2..70d22c34f 100644 --- a/tests/darwin/torch/dataset_test.py +++ b/tests/darwin/torch/dataset_test.py @@ -129,8 +129,9 @@ def test_loads_object_detection_dataset_from_bounding_box_annotations( assert image.size() == (3, 50, 50) label = {k: v.numpy().tolist() for k, v in label.items()} + assert label == { - "boxes": [[4, 33, 17, 36]], + "boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping "area": [612], "labels": [1], "image_id": [0], @@ -149,7 +150,7 @@ def test_loads_object_detection_dataset_from_polygon_annotations( label = {k: v.numpy().tolist() for k, v in label.items()} assert label == { - "boxes": [[4, 33, 17, 36]], + "boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping "area": [612], "labels": [1], "image_id": [0], @@ -168,7 +169,7 @@ def test_loads_object_detection_dataset_from_complex_polygon_annotations( label = {k: v.numpy().tolist() for k, v in label.items()} assert label == { - "boxes": [[1, 1, 39, 49]], + "boxes": [[1, 1, 39, 48]], "area": [1911], "labels": [1], "image_id": [0], @@ -210,7 +211,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations( label = {k: _maybe_tensor_to_list(v) for k, v in label.items()} - assert label["boxes"] == [[4.0, 33.0, 37.0, 17.0]] + assert label["boxes"] == [[4.0, 33.0, 36.0, 16.0]] assert label["area"] == [576.0] assert label["labels"] == [1] assert label["image_id"] == [0] @@ -231,7 +232,7 @@ def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations( label = {k: _maybe_tensor_to_list(v) for k, v in label.items()} - assert label["boxes"] == [[1.0, 1.0, 40.0, 49.0]] + assert label["boxes"] == [[1.0, 1.0, 39.0, 48.0]] assert label["area"] == [592.0] assert label["labels"] == [1] assert label["image_id"] == [0] diff --git a/tests/darwin/torch/utils_test.py b/tests/darwin/torch/utils_test.py index 598c598c4..4fca1354e 100644 --- a/tests/darwin/torch/utils_test.py +++ b/tests/darwin/torch/utils_test.py @@ -3,7 +3,7 @@ import numpy as np import torch -from darwin.torch.utils import flatten_masks_by_category +from darwin.torch.utils import clamp_bbox_to_image_size, flatten_masks_by_category from tests.fixtures import * @@ -67,3 +67,24 @@ def test_should_handle_multiple_overlaps(self, multiple_overlap_masks) -> None: expected_counts = torch.as_tensor([7, 2], dtype=torch.uint8) assert torch.equal(unique, expected_unique) assert torch.equal(counts, expected_counts) + +class TestClampBboxToImageSize: + def test_clamp_bbox_xyxy(self): + annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 25.0, 25.0]])} + width = 20 + height = 20 + + clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xyxy") + expected_boxes = torch.tensor([[5.0, 5.0, 15.0, 15.0], [0.0, 0.0, 19.0, 19.0]]) + + assert torch.equal(clamped_annotations['boxes'], expected_boxes) + + def test_clamp_bbox_xywh(self): + annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 30.0, 30.0]])} + width = 20 + height = 20 + + clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xywh") + expected_boxes = torch.tensor([[5.0, 5.0, 14.0, 14.0], [0.0, 0.0, 19.0, 19.0]]) + + assert torch.equal(clamped_annotations['boxes'], expected_boxes) From e02474fa94ccb3d6b157cb0c1d2724ae020f0e45 Mon Sep 17 00:00:00 2001 From: Christoffer Date: Thu, 28 Sep 2023 13:03:18 +0200 Subject: [PATCH 11/11] added the check for empty boxes --- darwin/torch/transforms.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py index edf95087a..65331a551 100644 --- a/darwin/torch/transforms.py +++ b/darwin/torch/transforms.py @@ -232,6 +232,14 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage. if keypoints is not None: target["keypoints"] = keypoints + # Remove boxes with widht or height zero + keep = (boxes[:, 3] > 0) & (boxes[:, 2] > 0) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + # conversion to coco api area = torch.tensor([obj["area"] for obj in annotations]) iscrowd = torch.tensor([obj.get("iscrowd", 0) for obj in annotations])