From 574ed68325900daf368e839073cadd970530247d Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Wed, 20 Sep 2023 14:41:23 +0200
Subject: [PATCH 01/11] added albumentation transform, clamp on bbox in obj det
 and changed instance seg to x,y,w,h format

---
 darwin/torch/dataset.py    |  5 ++-
 darwin/torch/transforms.py | 68 ++++++++++++++++++++++++++++++++++++--
 darwin/torch/utils.py      |  9 +++++
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py
index 3d5b8883e..1b29d3bad 100644
--- a/darwin/torch/dataset.py
+++ b/darwin/torch/dataset.py
@@ -15,7 +15,7 @@
     ConvertPolygonsToInstanceMasks,
     ConvertPolygonsToSemanticMask,
 )
-from darwin.torch.utils import polygon_area
+from darwin.torch.utils import clamp_bbox_to_image_size, polygon_area
 from darwin.utils import convert_polygons_to_sequences
 
 
@@ -546,6 +546,9 @@ def __getitem__(self, index: int):
         img: PILImage.Image = self.get_image(index)
         target: Dict[str, Any] = self.get_target(index)
 
+        width, height = img.size
+        target = clamp_bbox_to_image_size(target, width, height)
+
         if self.transform is not None:
             img_tensor, target = self.transform(img, target)
         else:
diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 6e466a377..a91e379e6 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -8,7 +8,6 @@
 
 from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category
 
-
 TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"]
 TargetType = Dict[TargetKey, torch.Tensor]
 
@@ -191,7 +190,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
         boxes = [obj["bbox"] for obj in annotations]
         # guard against no boxes via resizing
         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-        boxes[:, 2:] += boxes[:, :2]
         boxes[:, 0::2].clamp_(min=0, max=w)
         boxes[:, 1::2].clamp_(min=0, max=h)
 
@@ -278,3 +276,69 @@ def __call__(self, image: PILImage.Image, annotation: Dict[str, Any]) -> Tuple[P
             target = torch.zeros((h, w), dtype=torch.uint8)
         target = PILImage.fromarray(target.numpy())
         return image, target
+
+
+class AlbumentationsTransform(object):
+    """
+    Applies albumentation augmentations
+    """
+    
+    def __init__(self, transform):
+        self.transform = transform
+        
+    @classmethod
+    def from_path(cls, config_path):
+        transform = A.load(config_path)
+        return cls(transform)
+        
+    @classmethod
+    def from_dict(cls, alb_dict):
+        transform = A.from_dict(alb_dict)
+        return cls(transform)
+
+    def __call__(self, image, annotation):
+        
+        np_image = np.array(image)
+        albu_data = self.pre_process(np_image, annotation)
+        transformed_data = self.transform(**albu_data)
+        image, transformed_annotation = self.post_process(transformed_data, annotation)
+        
+        return TF.pil_to_tensor(image), transformed_annotation
+
+    def pre_process(self, image, darwin_annotations):
+       
+        albumentation_dict = {"image": image}
+        width, height = image.shape[:2]
+        
+        if "boxes" in darwin_annotations:
+            boxes = darwin_annotations['boxes'].numpy()
+            # Clip the bounding box values to ensure they are within the image
+            boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width)
+            boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height)
+            albumentation_dict['bboxes'] = boxes.tolist()
+            
+        if "labels" in darwin_annotations:
+            albumentation_dict['labels'] = darwin_annotations['labels'].tolist()
+            
+        if "masks" in darwin_annotations:
+            albumentation_dict["mask"] = darwin_annotations['masks'].tolist()
+            
+        return albumentation_dict
+
+    def post_process(self, albumentation_output, darwin_annotations):
+        
+        darwin_annotation = {'image_id': darwin_annotations['image_id']}
+        image = Image.fromarray(albumentation_output['image'])
+        
+        if "bboxes" in albumentation_output:
+            darwin_annotation['boxes'] = torch.tensor(albumentation_output['bboxes'])
+            
+        if "labels" in albumentation_output:
+            darwin_annotation['labels'] = torch.tensor(albumentation_output['labels'])
+        
+        if "boxes" in albumentation_output and "area" in darwin_annotations and not "masks" in darwin_annotations:  
+            bboxes =transformed_annotation["boxes"]
+            transformed_annotation['area'] = bboxes[:,2] * bboxes[:,3]
+        
+
+        return image, darwin_annotation
\ No newline at end of file
diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py
index d75bba866..51715b89c 100644
--- a/darwin/torch/utils.py
+++ b/darwin/torch/utils.py
@@ -192,3 +192,12 @@ def detectron2_register_dataset(
     if evaluator_type:
         MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type)
     return catalog_name
+
+
+def clamp_bbox_to_image_size(annotations, width, height):
+    boxes = annotations['boxes']
+    boxes[:, 0::2].clamp_(min=0, max=width)
+    boxes[:, 1::2].clamp_(min=0, max=height)
+    annotations['boxes'] = boxes
+
+    return annotations

From 9c0256b8ffebc8035c845c2883b29ae8886a6cca Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Wed, 20 Sep 2023 15:02:39 +0200
Subject: [PATCH 02/11] removed xmin < xmax check

---
 darwin/torch/transforms.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index a91e379e6..bfac8adf8 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -207,12 +207,12 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
             if num_keypoints:
                 keypoints = keypoints.view(num_keypoints, -1, 3)
 
-        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        boxes = boxes[keep]
-        classes = classes[keep]
-        masks = masks[keep]
-        if keypoints is not None:
-            keypoints = keypoints[keep]
+        #keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        #boxes = boxes[keep]
+        #classes = classes[keep]
+        #masks = masks[keep]
+        #if keypoints is not None:
+        #    keypoints = keypoints[keep]
 
         target["boxes"] = boxes
         target["labels"] = classes

From d1b636c735e7b3559e86ad805f7d75d3c15ccfa8 Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Thu, 21 Sep 2023 12:09:52 +0200
Subject: [PATCH 03/11] now albumentations supports instance segmentation and
 bbox

---
 darwin/torch/transforms.py | 121 +++++++++++++++++++++----------------
 pyproject.toml             |   1 +
 2 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index bfac8adf8..2b3045e9f 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -1,6 +1,8 @@
 import random
 from typing import Any, Dict, Optional, Tuple, Union
 
+import albumentations as A
+import numpy as np
 import torch
 import torchvision.transforms as transforms
 import torchvision.transforms.functional as F
@@ -278,67 +280,82 @@ def __call__(self, image: PILImage.Image, annotation: Dict[str, Any]) -> Tuple[P
         return image, target
 
 
-class AlbumentationsTransform(object):
+class AlbumentationsTransform:
     """
-    Applies albumentation augmentations
+    Wrapper class for Albumentations augmentations.
     """
-    
-    def __init__(self, transform):
+
+    def __init__(self, transform: A.Compose):
         self.transform = transform
-        
-    @classmethod
-    def from_path(cls, config_path):
-        transform = A.load(config_path)
-        return cls(transform)
-        
+
     @classmethod
-    def from_dict(cls, alb_dict):
-        transform = A.from_dict(alb_dict)
-        return cls(transform)
+    def from_path(cls, config_path: str) -> 'AlbumentationsTransform':
+        try:
+            transform = A.load(config_path)
+            return cls(transform)
+        except Exception as e:
+            raise ValueError(f"Invalid config path: {config_path}. Error: {e}")
 
-    def __call__(self, image, annotation):
-        
+    @classmethod
+    def from_dict(cls, alb_dict: dict) -> 'AlbumentationsTransform':
+        try:
+            transform = A.from_dict(alb_dict)
+            return cls(transform)
+        except Exception as e:
+            raise ValueError(f"Invalid albumentations dictionary. Error: {e}")
+
+    def __call__(self, image, annotation: dict) -> tuple:
         np_image = np.array(image)
-        albu_data = self.pre_process(np_image, annotation)
+        albu_data = self._pre_process(np_image, annotation)
         transformed_data = self.transform(**albu_data)
-        image, transformed_annotation = self.post_process(transformed_data, annotation)
-        
-        return TF.pil_to_tensor(image), transformed_annotation
+        image, transformed_annotation = self._post_process(transformed_data, annotation)
+        return F.pil_to_tensor(image), transformed_annotation
 
-    def pre_process(self, image, darwin_annotations):
-       
+    def _pre_process(self, image: np.ndarray, annotation: dict) -> dict:
+        """
+        Prepare image and annotation for albumentations transformation.
+        """
         albumentation_dict = {"image": image}
-        width, height = image.shape[:2]
         
-        if "boxes" in darwin_annotations:
-            boxes = darwin_annotations['boxes'].numpy()
-            # Clip the bounding box values to ensure they are within the image
-            boxes[:, [0, 2]] = np.clip(boxes[:, [0, 2]], 0, width)
-            boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]], 0, height)
-            albumentation_dict['bboxes'] = boxes.tolist()
-            
-        if "labels" in darwin_annotations:
-            albumentation_dict['labels'] = darwin_annotations['labels'].tolist()
-            
-        if "masks" in darwin_annotations:
-            albumentation_dict["mask"] = darwin_annotations['masks'].tolist()
-            
-        return albumentation_dict
+        boxes = annotation.get('boxes')
+        if boxes is not None:
+            albumentation_dict['bboxes'] = boxes.numpy().tolist()
 
-    def post_process(self, albumentation_output, darwin_annotations):
-        
-        darwin_annotation = {'image_id': darwin_annotations['image_id']}
-        image = Image.fromarray(albumentation_output['image'])
-        
-        if "bboxes" in albumentation_output:
-            darwin_annotation['boxes'] = torch.tensor(albumentation_output['bboxes'])
-            
-        if "labels" in albumentation_output:
-            darwin_annotation['labels'] = torch.tensor(albumentation_output['labels'])
-        
-        if "boxes" in albumentation_output and "area" in darwin_annotations and not "masks" in darwin_annotations:  
-            bboxes =transformed_annotation["boxes"]
-            transformed_annotation['area'] = bboxes[:,2] * bboxes[:,3]
-        
+        labels = annotation.get('labels')
+        if labels is not None:
+            albumentation_dict['labels'] = labels.tolist()
 
-        return image, darwin_annotation
\ No newline at end of file
+        masks = annotation.get('masks')
+        if masks is not None:
+            albumentation_dict["masks"] = masks.numpy()
+
+        return albumentation_dict
+
+    def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple:
+        """
+        Process the output of albumentations transformation back to desired format.
+        """
+        output_annotation = {'image_id': annotation['image_id']}
+        image = PILImage.fromarray(albumentation_output['image'])
+
+        bboxes = albumentation_output.get('bboxes')
+        if bboxes is not None:
+            output_annotation['boxes'] = torch.tensor(bboxes)
+            if 'area' in annotation and 'masks' not in albumentation_output:
+                output_annotation['area'] = output_annotation['boxes'][:, 2] * output_annotation['boxes'][:, 3]
+
+        labels = albumentation_output.get('labels')
+        if labels is not None:
+            output_annotation['labels'] = torch.tensor(labels)
+
+        masks = albumentation_output.get('masks')
+        if masks is not None:
+            output_annotation['masks'] = torch.tensor(masks)
+            if 'area' in annotation:
+                output_annotation['area'] = torch.sum(output_annotation['masks'], dim=[1, 2])
+
+        # Copy other metadata from original annotation
+        for key, value in annotation.items():
+            output_annotation.setdefault(key, value)
+
+        return image, output_annotation
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 240bd9406..aeda05d69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ types-pyyaml = "^6.0.12.9"
 python-dotenv = { version = "^1.0.0", python = ">3.8" }
 opencv-python-headless = { version = "^4.8.0.76", optional = true }
 pyyaml = "^6.0.1"
+albumentations = "^1.3.1"
 
 [tool.poetry.extras]
 dev = [

From 36b82a2fac956da31c4eb6be1645ec38a1117f2b Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Thu, 21 Sep 2023 18:03:19 +0200
Subject: [PATCH 04/11] updated tests

---
 darwin/torch/transforms.py         | 82 ++++++++++++++----------------
 tests/darwin/torch/dataset_test.py |  4 +-
 2 files changed, 39 insertions(+), 47 deletions(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 2b3045e9f..7d7f95459 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -32,9 +32,7 @@ class RandomHorizontalFlip(transforms.RandomHorizontalFlip):
     Allows for horizontal flipping of an image, randomly.
     """
 
-    def forward(
-        self, image: torch.Tensor, target: Optional[TargetType] = None
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         """
         May or may not horizontally flip an image depending on a random factor.
 
@@ -77,9 +75,7 @@ class RandomVerticalFlip(transforms.RandomVerticalFlip):
     Allows for vertical flipping of an image, randomly.
     """
 
-    def forward(
-        self, image: torch.Tensor, target: Optional[TargetType] = None
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         """
         May or may not vertically flip an image depending on a random factor.
 
@@ -121,9 +117,7 @@ class ColorJitter(transforms.ColorJitter):
     Jitters the colors of the given transformation.
     """
 
-    def __call__(
-        self, image: PILImage.Image, target: Optional[TargetType] = None
-    ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
+    def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
         transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue)
         image = transform(image)
         if target is None:
@@ -136,9 +130,7 @@ class ToTensor(transforms.ToTensor):
     Converts given ``PILImage`` to a ``Tensor``.
     """
 
-    def __call__(
-        self, image: PILImage.Image, target: Optional[TargetType] = None
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         image_tensor: torch.Tensor = F.to_tensor(image)
         if target is None:
             return image_tensor
@@ -150,9 +142,7 @@ class ToPILImage(transforms.ToPILImage):
     Converts given ``Tensor`` to a ``PILImage``.
     """
 
-    def __call__(
-        self, image: torch.Tensor, target: Optional[TargetType] = None
-    ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
+    def __call__(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
         pil_image: PILImage.Image = F.to_pil_image(image)
         if target is None:
             return pil_image
@@ -164,9 +154,7 @@ class Normalize(transforms.Normalize):
     Normalizes the given ``Tensor``.
     """
 
-    def __call__(
-        self, tensor: torch.Tensor, target: Optional[TargetType] = None
-    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def __call__(self, tensor: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         tensor = F.normalize(tensor, self.mean, self.std, self.inplace)
 
         if target is None:
@@ -209,11 +197,11 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
             if num_keypoints:
                 keypoints = keypoints.view(num_keypoints, -1, 3)
 
-        #keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        #boxes = boxes[keep]
-        #classes = classes[keep]
-        #masks = masks[keep]
-        #if keypoints is not None:
+        # keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        # boxes = boxes[keep]
+        # classes = classes[keep]
+        # masks = masks[keep]
+        # if keypoints is not None:
         #    keypoints = keypoints[keep]
 
         target["boxes"] = boxes
@@ -289,7 +277,7 @@ def __init__(self, transform: A.Compose):
         self.transform = transform
 
     @classmethod
-    def from_path(cls, config_path: str) -> 'AlbumentationsTransform':
+    def from_path(cls, config_path: str) -> "AlbumentationsTransform":
         try:
             transform = A.load(config_path)
             return cls(transform)
@@ -297,7 +285,7 @@ def from_path(cls, config_path: str) -> 'AlbumentationsTransform':
             raise ValueError(f"Invalid config path: {config_path}. Error: {e}")
 
     @classmethod
-    def from_dict(cls, alb_dict: dict) -> 'AlbumentationsTransform':
+    def from_dict(cls, alb_dict: dict) -> "AlbumentationsTransform":
         try:
             transform = A.from_dict(alb_dict)
             return cls(transform)
@@ -309,23 +297,24 @@ def __call__(self, image, annotation: dict) -> tuple:
         albu_data = self._pre_process(np_image, annotation)
         transformed_data = self.transform(**albu_data)
         image, transformed_annotation = self._post_process(transformed_data, annotation)
-        return F.pil_to_tensor(image), transformed_annotation
+
+        return image, transformed_annotation
 
     def _pre_process(self, image: np.ndarray, annotation: dict) -> dict:
         """
         Prepare image and annotation for albumentations transformation.
         """
         albumentation_dict = {"image": image}
-        
-        boxes = annotation.get('boxes')
+
+        boxes = annotation.get("boxes")
         if boxes is not None:
-            albumentation_dict['bboxes'] = boxes.numpy().tolist()
+            albumentation_dict["bboxes"] = boxes.numpy().tolist()
 
-        labels = annotation.get('labels')
+        labels = annotation.get("labels")
         if labels is not None:
-            albumentation_dict['labels'] = labels.tolist()
+            albumentation_dict["labels"] = labels.tolist()
 
-        masks = annotation.get('masks')
+        masks = annotation.get("masks")
         if masks is not None:
             albumentation_dict["masks"] = masks.numpy()
 
@@ -335,27 +324,30 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple:
         """
         Process the output of albumentations transformation back to desired format.
         """
-        output_annotation = {'image_id': annotation['image_id']}
-        image = PILImage.fromarray(albumentation_output['image'])
+        output_annotation = {}
+        image = albumentation_output["image"]
 
-        bboxes = albumentation_output.get('bboxes')
+        bboxes = albumentation_output.get("bboxes")
         if bboxes is not None:
-            output_annotation['boxes'] = torch.tensor(bboxes)
-            if 'area' in annotation and 'masks' not in albumentation_output:
-                output_annotation['area'] = output_annotation['boxes'][:, 2] * output_annotation['boxes'][:, 3]
+            output_annotation["boxes"] = torch.tensor(bboxes)
+            if "area" in annotation and "masks" not in albumentation_output:
+                output_annotation["area"] = output_annotation["boxes"][:, 2] * output_annotation["boxes"][:, 3]
 
-        labels = albumentation_output.get('labels')
+        labels = albumentation_output.get("labels")
         if labels is not None:
-            output_annotation['labels'] = torch.tensor(labels)
+            output_annotation["labels"] = torch.tensor(labels)
 
-        masks = albumentation_output.get('masks')
+        masks = albumentation_output.get("masks")
         if masks is not None:
-            output_annotation['masks'] = torch.tensor(masks)
-            if 'area' in annotation:
-                output_annotation['area'] = torch.sum(output_annotation['masks'], dim=[1, 2])
+            if isinstance(masks[0], np.ndarray):
+                output_annotation["masks"] = torch.tensor(np.array(masks))
+            else:
+                output_annotation["masks"] = torch.stack(masks)
+            if "area" in annotation:
+                output_annotation["area"] = torch.sum(output_annotation["masks"], dim=[1, 2])
 
         # Copy other metadata from original annotation
         for key, value in annotation.items():
             output_annotation.setdefault(key, value)
 
-        return image, output_annotation
\ No newline at end of file
+        return image, output_annotation
diff --git a/tests/darwin/torch/dataset_test.py b/tests/darwin/torch/dataset_test.py
index 05eb8f5ce..cfb2c03e2 100644
--- a/tests/darwin/torch/dataset_test.py
+++ b/tests/darwin/torch/dataset_test.py
@@ -210,7 +210,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations(
 
         label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}
 
-        assert label["boxes"] == [[4.0, 33.0, 41.0, 50.0]]
+        assert label["boxes"] == [[4.0, 33.0, 37.0, 17.0]]
         assert label["area"] == [576.0]
         assert label["labels"] == [1]
         assert label["image_id"] == [0]
@@ -231,7 +231,7 @@ def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations(
 
         label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}
 
-        assert label["boxes"] == [[1.0, 1.0, 41.0, 50.0]]
+        assert label["boxes"] == [[1.0, 1.0, 40.0, 49.0]]
         assert label["area"] == [592.0]
         assert label["labels"] == [1]
         assert label["image_id"] == [0]

From 0a180cea13f50c5e8fd6a8c3512fba1af362bea5 Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Fri, 22 Sep 2023 13:13:04 +0200
Subject: [PATCH 05/11] should work for classificaiton now as well

---
 darwin/torch/transforms.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 7d7f95459..319936ad3 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -292,8 +292,10 @@ def from_dict(cls, alb_dict: dict) -> "AlbumentationsTransform":
         except Exception as e:
             raise ValueError(f"Invalid albumentations dictionary. Error: {e}")
 
-    def __call__(self, image, annotation: dict) -> tuple:
+    def __call__(self, image, annotation: dict = None) -> tuple:
         np_image = np.array(image)
+        if annotation is None:
+            annotation = {}
         albu_data = self._pre_process(np_image, annotation)
         transformed_data = self.transform(**albu_data)
         image, transformed_annotation = self._post_process(transformed_data, annotation)

From aab52e2e2c40fcd1babd6834940673df3bfee5eb Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Tue, 26 Sep 2023 10:28:50 +0200
Subject: [PATCH 06/11] cleaner up

---
 darwin/torch/dataset.py    |  2 +-
 darwin/torch/transforms.py | 36 +++++++++++++++++++++++++++---------
 pyproject.toml             |  3 +--
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py
index 1b29d3bad..518d05527 100644
--- a/darwin/torch/dataset.py
+++ b/darwin/torch/dataset.py
@@ -397,7 +397,7 @@ def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None,
             self.num_classes += 1
         if transform is not None and isinstance(transform, list):
             transform = Compose(transform)
-
+ 
         self.transform: Optional[Callable] = transform
         self.convert_polygons = ConvertPolygonsToSemanticMask()
 
diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 319936ad3..470335b76 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -1,13 +1,29 @@
 import random
+from pathlib import Path
 from typing import Any, Dict, Optional, Tuple, Union
 
-import albumentations as A
 import numpy as np
 import torch
 import torchvision.transforms as transforms
 import torchvision.transforms.functional as F
 from PIL import Image as PILImage
 
+# Optional dependency
+try:
+    import albumentations as A
+except ImportError:
+    A = None
+
+from typing import TYPE_CHECKING, Type
+
+if TYPE_CHECKING:
+    from albumentations.pytorch import ToTensorV2
+    AType = Type[ToTensorV2]
+else:
+    AType = Type[None]
+
+
+
 from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category
 
 TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"]
@@ -197,13 +213,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
             if num_keypoints:
                 keypoints = keypoints.view(num_keypoints, -1, 3)
 
-        # keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
-        # boxes = boxes[keep]
-        # classes = classes[keep]
-        # masks = masks[keep]
-        # if keypoints is not None:
-        #    keypoints = keypoints[keep]
-
         target["boxes"] = boxes
         target["labels"] = classes
         target["masks"] = masks
@@ -273,13 +282,16 @@ class AlbumentationsTransform:
     Wrapper class for Albumentations augmentations.
     """
 
+
     def __init__(self, transform: A.Compose):
+        self._check_albumentaion_dependency()
         self.transform = transform
 
     @classmethod
     def from_path(cls, config_path: str) -> "AlbumentationsTransform":
+        config_path = Path(config_path)
         try:
-            transform = A.load(config_path)
+            transform = A.load(str(config_path))
             return cls(transform)
         except Exception as e:
             raise ValueError(f"Invalid config path: {config_path}. Error: {e}")
@@ -353,3 +365,9 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple:
             output_annotation.setdefault(key, value)
 
         return image, output_annotation
+
+    def  _check_albumentaion_dependency(self):
+        if A is None:
+            raise ImportError("The albumentations library is not installed. "
+                            "To use this function, install it with pip install albumentations, "
+                            "or install the ml extras of this package.")
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index aeda05d69..1228a331e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,7 +94,6 @@ types-pyyaml = "^6.0.12.9"
 python-dotenv = { version = "^1.0.0", python = ">3.8" }
 opencv-python-headless = { version = "^4.8.0.76", optional = true }
 pyyaml = "^6.0.1"
-albumentations = "^1.3.1"
 
 [tool.poetry.extras]
 dev = [
@@ -108,7 +107,7 @@ dev = [
     "flake8-pyproject",
 ]
 test = ["responses", "pytest", "flake8-pyproject"]
-ml = ["torch", "torchvision", "scikit-learn"]
+ml = ["torch", "torchvision", "scikit-learn", "albumentations"]
 medical = ["nibabel", "connected-components-3d"]
 ocv = ["opencv-python-headless"]
 

From 2ccc966cf26c9fb8147979f1fc8fee7c06b039ef Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Tue, 26 Sep 2023 10:29:47 +0200
Subject: [PATCH 07/11] formatting

---
 darwin/torch/transforms.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 470335b76..516f04803 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -18,12 +18,12 @@
 
 if TYPE_CHECKING:
     from albumentations.pytorch import ToTensorV2
+
     AType = Type[ToTensorV2]
 else:
     AType = Type[None]
 
 
-
 from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category
 
 TargetKey = Union["boxes", "labels", "mask", "masks", "image_id", "area", "iscrowd"]
@@ -282,7 +282,6 @@ class AlbumentationsTransform:
     Wrapper class for Albumentations augmentations.
     """
 
-
     def __init__(self, transform: A.Compose):
         self._check_albumentaion_dependency()
         self.transform = transform
@@ -366,8 +365,10 @@ def _post_process(self, albumentation_output: dict, annotation: dict) -> tuple:
 
         return image, output_annotation
 
-    def  _check_albumentaion_dependency(self):
+    def _check_albumentaion_dependency(self):
         if A is None:
-            raise ImportError("The albumentations library is not installed. "
-                            "To use this function, install it with pip install albumentations, "
-                            "or install the ml extras of this package.")
\ No newline at end of file
+            raise ImportError(
+                "The albumentations library is not installed. "
+                "To use this function, install it with pip install albumentations, "
+                "or install the ml extras of this package."
+            )

From 81d48fd3b3c44575b15e6ef0f8d2631fc8e55b5e Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Tue, 26 Sep 2023 13:37:11 +0200
Subject: [PATCH 08/11] better error handling when albumentations is not
 installed

---
 darwin/torch/transforms.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 516f04803..75b682f30 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -11,6 +11,7 @@
 # Optional dependency
 try:
     import albumentations as A
+    from albumentations import Compose
 except ImportError:
     A = None
 
@@ -22,6 +23,7 @@
     AType = Type[ToTensorV2]
 else:
     AType = Type[None]
+    Compose = Type[None]
 
 
 from darwin.torch.utils import convert_segmentation_to_mask, flatten_masks_by_category
@@ -282,7 +284,7 @@ class AlbumentationsTransform:
     Wrapper class for Albumentations augmentations.
     """
 
-    def __init__(self, transform: A.Compose):
+    def __init__(self, transform: Compose):
         self._check_albumentaion_dependency()
         self.transform = transform
 

From e5a41da2a224bd318a033ce152e0dfd8c270edbf Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Thu, 28 Sep 2023 11:15:28 +0200
Subject: [PATCH 09/11] fixed potential clamp issues

---
 darwin/torch/dataset.py | 14 ++++++++++++--
 darwin/torch/utils.py   | 32 ++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py
index 518d05527..4e6a72b01 100644
--- a/darwin/torch/dataset.py
+++ b/darwin/torch/dataset.py
@@ -333,8 +333,18 @@ def get_target(self, index: int) -> Dict[str, Any]:
             min_y: float = np.min([np.min(y_coord) for y_coord in y_coords])
             max_x: float = np.max([np.max(x_coord) for x_coord in x_coords])
             max_y: float = np.max([np.max(y_coord) for y_coord in y_coords])
-            w: float = max_x - min_x + 1
-            h: float = max_y - min_y + 1
+
+            # Clamp the coordinates to the image dimensions
+            min_x: float = max(0, min_x)
+            min_y: float = max(0, min_y)
+            max_x: float = min(target["width"] - 1, max_x)
+            max_y: float = min(target["height"] - 1, max_y)
+
+            assert min_x < max_x and min_y < max_y
+
+            # Convert to XYWH
+            w: float = max_x - (min_x + 1)
+            h: float = max_y - (min_y + 1)
             # Compute the area of the polygon
             # TODO fix with addictive/subtractive paths in complex polygons
             poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])
diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py
index 51715b89c..e12d4976c 100644
--- a/darwin/torch/utils.py
+++ b/darwin/torch/utils.py
@@ -194,10 +194,34 @@ def detectron2_register_dataset(
     return catalog_name
 
 
-def clamp_bbox_to_image_size(annotations, width, height):
+def clamp_bbox_to_image_size(annotations, width, height, format="xywh"):
+    """
+    Clamps bounding boxes in annotations to the given image dimensions.
+    
+    :param annotations: Dictionary containing bounding box coordinates in 'boxes' key.
+    :param width: Width of the image.
+    :param height: Height of the image.
+    :param format: Format of the bounding boxes, either "xywh" or "xyxy".
+    :return: Annotations with clamped bounding boxes.
+    
+    The function modifies the input annotations dictionary to clamp the bounding box coordinates
+    based on the specified format, ensuring they lie within the image dimensions.
+    """
     boxes = annotations['boxes']
-    boxes[:, 0::2].clamp_(min=0, max=width)
-    boxes[:, 1::2].clamp_(min=0, max=height)
+    
+    if format == "xyxy":
+        boxes[:, 0::2].clamp_(min=0, max=width - 1)
+        boxes[:, 1::2].clamp_(min=0, max=height - 1)
+    
+    elif format == "xywh":
+        boxes[:, 0].clamp_(min=0, max=width - boxes[:, 2] - 1)
+        boxes[:, 1].clamp_(min=0, max=height - boxes[:, 3] - 1)
+        boxes[:, 2].clamp_(min=0, max=width - boxes[:, 0] - 1)
+        boxes[:, 3].clamp_(min=0, max=height - boxes[:, 1] - 1)
+        
+    else:
+        raise ValueError(f"Unsupported bounding box format: {format}")
+    
     annotations['boxes'] = boxes
-
     return annotations
+

From dd59fca1b3fcda07510ad8a655cec978074a0b95 Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Thu, 28 Sep 2023 12:50:08 +0200
Subject: [PATCH 10/11] adjusted tests to reflect the xyxy to xywh changes of
 the instance seg test

---
 darwin/torch/dataset.py            |  8 +++----
 darwin/torch/transforms.py         | 26 ++++++++++++++-------
 darwin/torch/utils.py              | 36 +++++++++++++++---------------
 darwin/utils/utils.py              |  4 ++--
 tests/darwin/torch/dataset_test.py | 11 ++++-----
 tests/darwin/torch/utils_test.py   | 23 ++++++++++++++++++-
 6 files changed, 70 insertions(+), 38 deletions(-)

diff --git a/darwin/torch/dataset.py b/darwin/torch/dataset.py
index 4e6a72b01..ebc3b4d17 100644
--- a/darwin/torch/dataset.py
+++ b/darwin/torch/dataset.py
@@ -343,8 +343,9 @@ def get_target(self, index: int) -> Dict[str, Any]:
             assert min_x < max_x and min_y < max_y
 
             # Convert to XYWH
-            w: float = max_x - (min_x + 1)
-            h: float = max_y - (min_y + 1)
+            w: float = max_x - min_x
+            h: float = max_y - min_y
+
             # Compute the area of the polygon
             # TODO fix with addictive/subtractive paths in complex polygons
             poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])
@@ -400,14 +401,13 @@ class SemanticSegmentationDataset(LocalDataset):
     """
 
     def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs):
-
         super().__init__(annotation_type="polygon", **kwargs)
         if not "__background__" in self.classes:
             self.classes.insert(0, "__background__")
             self.num_classes += 1
         if transform is not None and isinstance(transform, list):
             transform = Compose(transform)
- 
+
         self.transform: Optional[Callable] = transform
         self.convert_polygons = ConvertPolygonsToSemanticMask()
 
diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index 75b682f30..edf95087a 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -50,7 +50,9 @@ class RandomHorizontalFlip(transforms.RandomHorizontalFlip):
     Allows for horizontal flipping of an image, randomly.
     """
 
-    def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def forward(
+        self, image: torch.Tensor, target: Optional[TargetType] = None
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         """
         May or may not horizontally flip an image depending on a random factor.
 
@@ -93,7 +95,9 @@ class RandomVerticalFlip(transforms.RandomVerticalFlip):
     Allows for vertical flipping of an image, randomly.
     """
 
-    def forward(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def forward(
+        self, image: torch.Tensor, target: Optional[TargetType] = None
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         """
         May or may not vertically flip an image depending on a random factor.
 
@@ -135,7 +139,9 @@ class ColorJitter(transforms.ColorJitter):
     Jitters the colors of the given transformation.
     """
 
-    def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
+    def __call__(
+        self, image: PILImage.Image, target: Optional[TargetType] = None
+    ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
         transform = self.get_params(self.brightness, self.contrast, self.saturation, self.hue)
         image = transform(image)
         if target is None:
@@ -148,7 +154,9 @@ class ToTensor(transforms.ToTensor):
     Converts given ``PILImage`` to a ``Tensor``.
     """
 
-    def __call__(self, image: PILImage.Image, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def __call__(
+        self, image: PILImage.Image, target: Optional[TargetType] = None
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         image_tensor: torch.Tensor = F.to_tensor(image)
         if target is None:
             return image_tensor
@@ -160,7 +168,9 @@ class ToPILImage(transforms.ToPILImage):
     Converts given ``Tensor`` to a ``PILImage``.
     """
 
-    def __call__(self, image: torch.Tensor, target: Optional[TargetType] = None) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
+    def __call__(
+        self, image: torch.Tensor, target: Optional[TargetType] = None
+    ) -> Union[PILImage.Image, Tuple[PILImage.Image, TargetType]]:
         pil_image: PILImage.Image = F.to_pil_image(image)
         if target is None:
             return pil_image
@@ -172,7 +182,9 @@ class Normalize(transforms.Normalize):
     Normalizes the given ``Tensor``.
     """
 
-    def __call__(self, tensor: torch.Tensor, target: Optional[TargetType] = None) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
+    def __call__(
+        self, tensor: torch.Tensor, target: Optional[TargetType] = None
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, TargetType]]:
         tensor = F.normalize(tensor, self.mean, self.std, self.inplace)
 
         if target is None:
@@ -198,8 +210,6 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
         boxes = [obj["bbox"] for obj in annotations]
         # guard against no boxes via resizing
         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
-        boxes[:, 0::2].clamp_(min=0, max=w)
-        boxes[:, 1::2].clamp_(min=0, max=h)
 
         classes = [obj["category_id"] for obj in annotations]
         classes = torch.tensor(classes, dtype=torch.int64)
diff --git a/darwin/torch/utils.py b/darwin/torch/utils.py
index e12d4976c..3080c6ac6 100644
--- a/darwin/torch/utils.py
+++ b/darwin/torch/utils.py
@@ -194,34 +194,34 @@ def detectron2_register_dataset(
     return catalog_name
 
 
-def clamp_bbox_to_image_size(annotations, width, height, format="xywh"):
+def clamp_bbox_to_image_size(annotations, img_width, img_height, format="xywh"):
     """
     Clamps bounding boxes in annotations to the given image dimensions.
-    
+
     :param annotations: Dictionary containing bounding box coordinates in 'boxes' key.
-    :param width: Width of the image.
-    :param height: Height of the image.
+    :param img_width: Width of the image.
+    :param img_height: Height of the image.
     :param format: Format of the bounding boxes, either "xywh" or "xyxy".
     :return: Annotations with clamped bounding boxes.
-    
+
     The function modifies the input annotations dictionary to clamp the bounding box coordinates
     based on the specified format, ensuring they lie within the image dimensions.
     """
-    boxes = annotations['boxes']
-    
+    boxes = annotations["boxes"]
+
     if format == "xyxy":
-        boxes[:, 0::2].clamp_(min=0, max=width - 1)
-        boxes[:, 1::2].clamp_(min=0, max=height - 1)
-    
+        boxes[:, 0::2].clamp_(min=0, max=img_width - 1)
+        boxes[:, 1::2].clamp_(min=0, max=img_height - 1)
+
     elif format == "xywh":
-        boxes[:, 0].clamp_(min=0, max=width - boxes[:, 2] - 1)
-        boxes[:, 1].clamp_(min=0, max=height - boxes[:, 3] - 1)
-        boxes[:, 2].clamp_(min=0, max=width - boxes[:, 0] - 1)
-        boxes[:, 3].clamp_(min=0, max=height - boxes[:, 1] - 1)
-        
+        # First, clamp the x and y coordinates
+        boxes[:, 0].clamp_(min=0, max=img_width - 1)
+        boxes[:, 1].clamp_(min=0, max=img_height - 1)
+        # Then, clamp the width and height
+        boxes[:, 2].clamp_(min=torch.tensor(0), max=img_width - boxes[:, 0] - 1)  # -1 since we images are zero-indexed
+        boxes[:, 3].clamp_(min=torch.tensor(0), max=img_height - boxes[:, 1] - 1)  # -1 since we images are zero-indexed
     else:
         raise ValueError(f"Unsupported bounding box format: {format}")
-    
-    annotations['boxes'] = boxes
-    return annotations
 
+    annotations["boxes"] = boxes
+    return annotations
diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index e4907f50e..38e69c740 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -919,8 +919,8 @@ def convert_polygons_to_sequences(
         path: List[Union[int, float]] = []
         for point in polygon:
             # Clip coordinates to the image size
-            x = max(min(point["x"], width - 1) if width else point["x"], 0)
-            y = max(min(point["y"], height - 1) if height else point["y"], 0)
+            x = max(min(point["x"], width -1) if width else point["x"], 0)
+            y = max(min(point["y"], height -1) if height else point["y"], 0)
             if rounding:
                 path.append(round(x))
                 path.append(round(y))
diff --git a/tests/darwin/torch/dataset_test.py b/tests/darwin/torch/dataset_test.py
index cfb2c03e2..70d22c34f 100644
--- a/tests/darwin/torch/dataset_test.py
+++ b/tests/darwin/torch/dataset_test.py
@@ -129,8 +129,9 @@ def test_loads_object_detection_dataset_from_bounding_box_annotations(
         assert image.size() == (3, 50, 50)
 
         label = {k: v.numpy().tolist() for k, v in label.items()}
+
         assert label == {
-            "boxes": [[4, 33, 17, 36]],
+            "boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping
             "area": [612],
             "labels": [1],
             "image_id": [0],
@@ -149,7 +150,7 @@ def test_loads_object_detection_dataset_from_polygon_annotations(
 
         label = {k: v.numpy().tolist() for k, v in label.items()}
         assert label == {
-            "boxes": [[4, 33, 17, 36]],
+            "boxes": [[4, 33, 17, 16]], # we need to account for xywh format and clamping
             "area": [612],
             "labels": [1],
             "image_id": [0],
@@ -168,7 +169,7 @@ def test_loads_object_detection_dataset_from_complex_polygon_annotations(
 
         label = {k: v.numpy().tolist() for k, v in label.items()}
         assert label == {
-            "boxes": [[1, 1, 39, 49]],
+            "boxes": [[1, 1, 39, 48]],
             "area": [1911],
             "labels": [1],
             "image_id": [0],
@@ -210,7 +211,7 @@ def test_loads_instance_segmentation_dataset_from_polygon_annotations(
 
         label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}
 
-        assert label["boxes"] == [[4.0, 33.0, 37.0, 17.0]]
+        assert label["boxes"] == [[4.0, 33.0, 36.0, 16.0]]
         assert label["area"] == [576.0]
         assert label["labels"] == [1]
         assert label["image_id"] == [0]
@@ -231,7 +232,7 @@ def test_loads_instance_segmentation_dataset_from_complex_polygon_annotations(
 
         label = {k: _maybe_tensor_to_list(v) for k, v in label.items()}
 
-        assert label["boxes"] == [[1.0, 1.0, 40.0, 49.0]]
+        assert label["boxes"] == [[1.0, 1.0, 39.0, 48.0]]
         assert label["area"] == [592.0]
         assert label["labels"] == [1]
         assert label["image_id"] == [0]
diff --git a/tests/darwin/torch/utils_test.py b/tests/darwin/torch/utils_test.py
index 598c598c4..4fca1354e 100644
--- a/tests/darwin/torch/utils_test.py
+++ b/tests/darwin/torch/utils_test.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch
 
-from darwin.torch.utils import flatten_masks_by_category
+from darwin.torch.utils import clamp_bbox_to_image_size, flatten_masks_by_category
 from tests.fixtures import *
 
 
@@ -67,3 +67,24 @@ def test_should_handle_multiple_overlaps(self, multiple_overlap_masks) -> None:
         expected_counts = torch.as_tensor([7, 2], dtype=torch.uint8)
         assert torch.equal(unique, expected_unique)
         assert torch.equal(counts, expected_counts)
+
+class TestClampBboxToImageSize:
+    def test_clamp_bbox_xyxy(self):
+        annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 25.0, 25.0]])}
+        width = 20
+        height = 20
+        
+        clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xyxy")
+        expected_boxes = torch.tensor([[5.0, 5.0, 15.0, 15.0], [0.0, 0.0, 19.0, 19.0]])
+        
+        assert torch.equal(clamped_annotations['boxes'], expected_boxes)
+
+    def test_clamp_bbox_xywh(self):
+        annotations = {'boxes': torch.tensor([[5.0, 5.0, 15.0, 15.0], [-5.0, -5.0, 30.0, 30.0]])}
+        width = 20
+        height = 20
+        
+        clamped_annotations = clamp_bbox_to_image_size(annotations, width, height, format="xywh")
+        expected_boxes = torch.tensor([[5.0, 5.0, 14.0, 14.0], [0.0, 0.0, 19.0, 19.0]])
+        
+        assert torch.equal(clamped_annotations['boxes'], expected_boxes)

From e02474fa94ccb3d6b157cb0c1d2724ae020f0e45 Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Thu, 28 Sep 2023 13:03:18 +0200
Subject: [PATCH 11/11] added the check for empty boxes

---
 darwin/torch/transforms.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/darwin/torch/transforms.py b/darwin/torch/transforms.py
index edf95087a..65331a551 100644
--- a/darwin/torch/transforms.py
+++ b/darwin/torch/transforms.py
@@ -232,6 +232,14 @@ def __call__(self, image: PILImage.Image, target: TargetType) -> Tuple[PILImage.
         if keypoints is not None:
             target["keypoints"] = keypoints
 
+        # Remove boxes with widht or height zero
+        keep = (boxes[:, 3] > 0) & (boxes[:, 2] > 0)
+        boxes = boxes[keep]
+        classes = classes[keep]
+        masks = masks[keep]
+        if keypoints is not None:
+            keypoints = keypoints[keep]
+
         # conversion to coco api
         area = torch.tensor([obj["area"] for obj in annotations])
         iscrowd = torch.tensor([obj.get("iscrowd", 0) for obj in annotations])