Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions darwin/torch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
ConvertPolygonsToInstanceMasks,
ConvertPolygonsToSemanticMask,
)
from darwin.torch.utils import polygon_area
from darwin.torch.utils import clamp_bbox_to_image_size, polygon_area
from darwin.utils import convert_polygons_to_sequences


Expand Down Expand Up @@ -333,8 +333,19 @@ def get_target(self, index: int) -> Dict[str, Any]:
min_y: float = np.min([np.min(y_coord) for y_coord in y_coords])
max_x: float = np.max([np.max(x_coord) for x_coord in x_coords])
max_y: float = np.max([np.max(y_coord) for y_coord in y_coords])
w: float = max_x - min_x + 1
h: float = max_y - min_y + 1

# Clamp the coordinates to the image dimensions
min_x: float = max(0, min_x)
min_y: float = max(0, min_y)
max_x: float = min(target["width"] - 1, max_x)
max_y: float = min(target["height"] - 1, max_y)

assert min_x < max_x and min_y < max_y

# Convert to XYWH
w: float = max_x - min_x
h: float = max_y - min_y

# Compute the area of the polygon
# TODO fix with addictive/subtractive paths in complex polygons
poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])
Expand Down Expand Up @@ -390,7 +401,6 @@ class SemanticSegmentationDataset(LocalDataset):
"""

def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs):

super().__init__(annotation_type="polygon", **kwargs)
if not "__background__" in self.classes:
self.classes.insert(0, "__background__")
Expand Down Expand Up @@ -546,6 +556,9 @@ def __getitem__(self, index: int):
img: PILImage.Image = self.get_image(index)
target: Dict[str, Any] = self.get_target(index)

width, height = img.size
target = clamp_bbox_to_image_size(target, width, height)

if self.transform is not None:
img_tensor, target = self.transform(img, target)
else:
Expand Down
136 changes: 125 additions & 11 deletions darwin/torch/transforms.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,33 @@
import random
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union

import numpy as np
import torch
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from PIL import Image as PILImage

from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category
# Optional dependency
try:
import albumentations as A
from albumentations import Compose
except ImportError:
A = None

from typing import TYPE_CHECKING, Type

if TYPE_CHECKING:
from albumentations.pytorch import ToTensorV2

AType = Type[ToTensorV2]
else:
AType = Type[None]
Compose = Type[None]


from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category

TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"]
TargetType = Dict[TargetKey, torch.Tensor]

Expand Down Expand Up @@ -191,9 +210,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
boxes = [obj["bbox"] for obj in annotations]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)

classes = [obj["category_id"] for obj in annotations]
classes = torch.tensor(classes, dtype=torch.int64)
Expand All @@ -209,20 +225,21 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)

keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]

target["boxes"] = boxes
target["labels"] = classes
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints

# Remove boxes with widht or height zero
keep = (boxes[:, 3] > 0) & (boxes[:, 2] > 0)
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]

# conversion to coco api
area = torch.tensor([obj["area"] for obj in annotations])
iscrowd = torch.tensor([obj.get("iscrowd", 0) for obj in annotations])
Expand Down Expand Up @@ -278,3 +295,100 @@ def __call__(self, image: PILImage.Image, annotation: Dict[str, Any]) -> Tuple[P
target = torch.zeros((h, w), dtype=torch.uint8)
target = PILImage.fromarray(target.numpy())
return image, target


class AlbumentationsTransform:
"""
Wrapper class for Albumentations augmentations.
"""

def __init__(self, transform: Compose):
self._check_albumentaion_dependency()
self.transform = transform

@classmethod
def from_path(cls, config_path: str) -> "AlbumentationsTransform":
config_path = Path(config_path)
try:
transform = A.load(str(config_path))
return cls(transform)
except Exception as e:
raise ValueError(f"Invalid config path: {config_path}. Error: {e}")

@classmethod
def from_dict(cls, alb_dict: dict) -> "AlbumentationsTransform":
try:
transform = A.from_dict(alb_dict)
return cls(transform)
except Exception as e:
raise ValueError(f"Invalid albumentations dictionary. Error: {e}")

def __call__(self, image, annotation: dict = None) -> tuple:
np_image = np.array(image)
if annotation is None:
annotation = {}
albu_data = self._pre_process(np_image, annotation)
transformed_data = self.transform(**albu_data)
image, transformed_annotation = self._post_process(transformed_data, annotation)

return image, transformed_annotation

def _pre_process(self, image: np.ndarray, annotation: dict) -> dict:
"""
Prepare image and annotation for albumentations transformation.
"""
albumentation_dict = {"image": image}

boxes = annotation.get("boxes")
if boxes is not None:
albumentation_dict["bboxes"] = boxes.numpy().tolist()

labels = annotation.get("labels")
if labels is not None:
albumentation_dict["labels"] = labels.tolist()

masks = annotation.get("masks")
if masks is not None:
albumentation_dict["masks"] = masks.numpy()

return albumentation_dict

def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple:
"""
Process the output of albumentations transformation back to desired format.
"""
output_annotation = {}
image = albumentation_output["image"]

bboxes = albumentation_output.get("bboxes")
if bboxes is not None:
output_annotation["boxes"] = torch.tensor(bboxes)
if "area" in annotation and "masks" not in albumentation_output:
output_annotation["area"] = output_annotation["boxes"][:, 2] * output_annotation["boxes"][:, 3]

labels = albumentation_output.get("labels")
if labels is not None:
output_annotation["labels"] = torch.tensor(labels)

masks = albumentation_output.get("masks")
if masks is not None:
if isinstance(masks[0], np.ndarray):
output_annotation["masks"] = torch.tensor(np.array(masks))
else:
output_annotation["masks"] = torch.stack(masks)
if "area" in annotation:
output_annotation["area"] = torch.sum(output_annotation["masks"], dim=[1, 2])

# Copy other metadata from original annotation
for key, value in annotation.items():
output_annotation.setdefault(key, value)

return image, output_annotation

def _check_albumentaion_dependency(self):
if A is None:
raise ImportError(
"The albumentations library is not installed. "
"To use this function, install it with pip install albumentations, "
"or install the ml extras of this package."
)
33 changes: 33 additions & 0 deletions darwin/torch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,36 @@ def detectron2_register_dataset(
if evaluator_type:
MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type)
return catalog_name


def clamp_bbox_to_image_size(annotations, img_width, img_height, format="xywh"):
"""
Clamps bounding boxes in annotations to the given image dimensions.

:param annotations: Dictionary containing bounding box coordinates in 'boxes' key.
:param img_width: Width of the image.
:param img_height: Height of the image.
:param format: Format of the bounding boxes, either "xywh" or "xyxy".
:return: Annotations with clamped bounding boxes.

The function modifies the input annotations dictionary to clamp the bounding box coordinates
based on the specified format, ensuring they lie within the image dimensions.
"""
boxes = annotations["boxes"]

if format == "xyxy":
boxes[:, 0::2].clamp_(min=0, max=img_width - 1)
boxes[:, 1::2].clamp_(min=0, max=img_height - 1)

elif format == "xywh":
# First, clamp the x and y coordinates
boxes[:, 0].clamp_(min=0, max=img_width - 1)
boxes[:, 1].clamp_(min=0, max=img_height - 1)
# Then, clamp the width and height
boxes[:, 2].clamp_(min=torch.tensor(0), max=img_width - boxes[:, 0] - 1) # -1 since we images are zero-indexed
boxes[:, 3].clamp_(min=torch.tensor(0), max=img_height - boxes[:, 1] - 1) # -1 since we images are zero-indexed
else:
raise ValueError(f"Unsupported bounding box format: {format}")

annotations["boxes"] = boxes
return annotations
4 changes: 2 additions & 2 deletions darwin/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,8 @@ def convert_polygons_to_sequences(
path: List[Union[int, float]] = []
for point in polygon:
# Clip coordinates to the image size
x = max(min(point["x"], width - 1) if width else point["x"], 0)
y = max(min(point["y"], height - 1) if height else point["y"], 0)
x = max(min(point["x"], width -1) if width else point["x"], 0)
y = max(min(point["y"], height -1) if height else point["y"], 0)
if rounding:
path.append(round(x))
path.append(round(y))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ dev = [
"flake8-pyproject",
]
test = ["responses", "pytest", "flake8-pyproject"]
ml = ["torch", "torchvision", "scikit-learn"]
ml = ["torch", "torchvision", "scikit-learn", "albumentations"]
medical = ["nibabel", "connected-components-3d"]
ocv = ["opencv-python-headless"]

Expand Down
11 changes: 6 additions & 5 deletions tests/darwin/torch/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ def test_loads_object_detection_dataset_from_bounding_box_annotations(
assert image.size() == (3, 50, 50)

label = {k: v.numpy().tolist() for k, v in label.items()}

assert label == {
"boxes": [[4, 33, 17, 36]],
"boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping
"area": [612],
"labels": [1],
"image_id": [0],
Expand All @@ -149,7 +150,7 @@ def test_loads_object_detection_dataset_from_polygon_annotations(

label = {k: v.numpy().tolist() for k, v in label.items()}
assert label == {
"boxes": [[4, 33, 17, 36]],
"boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping
"area": [612],
"labels": [1],
"image_id": [0],
Expand All @@ -168,7 +169,7 @@ def test_loads_object_detection_dataset_from_complex_polygon_annotations(

label = {k: v.numpy().tolist() for k, v in label.items()}
assert label == {
"boxes": [[1, 1, 39, 49]],
"boxes": [[1, 1, 39, 48]],
"area": [1911],
"labels": [1],
"image_id": [0],
Expand Down Expand Up @@ -210,7 +211,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations(

label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}

assert label["boxes"] == [[4.0, 33.0, 41.0, 50.0]]
assert label["boxes"] == [[4.0, 33.0, 36.0, 16.0]]
assert label["area"] == [576.0]
assert label["labels"] == [1]
assert label["image_id"] == [0]
Expand All @@ -231,7 +232,7 @@ def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations(

label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}

assert label["boxes"] == [[1.0, 1.0, 41.0, 50.0]]
assert label["boxes"] == [[1.0, 1.0, 39.0, 48.0]]
assert label["area"] == [592.0]
assert label["labels"] == [1]
assert label["image_id"] == [0]
Expand Down
23 changes: 22 additions & 1 deletion tests/darwin/torch/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import torch

from darwin.torch.utils import flatten_masks_by_category
from darwin.torch.utils import clamp_bbox_to_image_size, flatten_masks_by_category
from tests.fixtures import *


Expand Down Expand Up @@ -67,3 +67,24 @@ def test_should_handle_multiple_overlaps(self, multiple_overlap_masks) -> None:
expected_counts = torch.as_tensor([7, 2], dtype=torch.uint8)
assert torch.equal(unique, expected_unique)
assert torch.equal(counts, expected_counts)

class TestClampBboxToImageSize:
def test_clamp_bbox_xyxy(self):
annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 25.0, 25.0]])}
width = 20
height = 20

clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xyxy")
expected_boxes = torch.tensor([[5.0, 5.0, 15.0, 15.0], [0.0, 0.0, 19.0, 19.0]])

assert torch.equal(clamped_annotations['boxes'], expected_boxes)

def test_clamp_bbox_xywh(self):
annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 30.0, 30.0]])}
width = 20
height = 20

clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xywh")
expected_boxes = torch.tensor([[5.0, 5.0, 14.0, 14.0], [0.0, 0.0, 19.0, 19.0]])

assert torch.equal(clamped_annotations['boxes'], expected_boxes)