Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IO-1405][external] YoloV8 Segementation dataset support #643

Merged
merged 5 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions darwin/exporter/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
"semantic_mask_grey",
"semantic_mask_index",
"yolo",
"yolo_segmented",
"nifti",
]
Empty file.
46 changes: 46 additions & 0 deletions darwin/exporter/formats/helpers/yolo_class_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pathlib import Path
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extracted some normal YOLO functions out for common usage by YOLO and YOLO segementation

from typing import Callable, Dict, Iterable, List

from darwin.datatypes import AnnotationFile

ClassIndex = Dict[str, int]


def build_class_index(
annotation_files: Iterable[AnnotationFile],
include_types: List[str] = ["bounding_box", "polygon", "complex_polygon"],
) -> ClassIndex:
classes = set()
for annotation_file in annotation_files:
for annotation in annotation_file.annotations:
if annotation.annotation_class.annotation_type in include_types:
classes.add(annotation.annotation_class.name)
return {k: v for (v, k) in enumerate(sorted(classes))}


def export_file(
annotation_file: AnnotationFile,
class_index: ClassIndex,
output_dir: Path,
build_function: Callable[[AnnotationFile, ClassIndex], str],
) -> None:
txt = build_function(annotation_file, class_index)

# Just using `.with_suffix(".txt")` would remove all suffixes, so we need to
# do it manually.

filename = annotation_file.path.name
filename_to_write = filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt"
output_file_path = output_dir / filename_to_write

output_file_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_file_path, "w") as f:
f.write(txt)


def save_class_index(class_index: ClassIndex, output_dir: Path) -> None:
sorted_items = sorted(class_index.items(), key=lambda item: item[1])

with open(output_dir / "darknet.labels", "w") as f:
for class_name, _ in sorted_items:
f.write(f"{class_name}\n")
52 changes: 14 additions & 38 deletions darwin/exporter/formats/yolo.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from pathlib import Path
from typing import Dict, Iterable
from typing import Iterable

import darwin.datatypes as dt

ClassIndex = Dict[str, int]
from darwin.exporter.formats.helpers.yolo_class_builder import (
ClassIndex,
build_class_index,
export_file,
save_class_index,
)


def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> None:
Expand All @@ -21,42 +25,22 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N

annotation_files = list(annotation_files)

class_index = _build_class_index(annotation_files)
class_index = build_class_index(annotation_files)

for annotation_file in annotation_files:
_export_file(annotation_file, class_index, output_dir)

_save_class_index(class_index, output_dir)


def _export_file(annotation_file: dt.AnnotationFile, class_index: ClassIndex, output_dir: Path) -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions now in the shared helper file

txt = _build_txt(annotation_file, class_index)

# Just using `.with_suffix(".txt")` would remove all suffixes, so we need to
# do it manually.

filename = annotation_file.path.name
filename_to_write = filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt"
output_file_path = output_dir / filename_to_write

output_file_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_file_path, "w") as f:
f.write(txt)
export_file(annotation_file, class_index, output_dir, _build_txt)


def _build_class_index(annotation_files: Iterable[dt.AnnotationFile]) -> ClassIndex:
classes = set()
for annotation_file in annotation_files:
for annotation in annotation_file.annotations:
if annotation.annotation_class.annotation_type in ["bounding_box", "polygon", "complex_polygon"]:
classes.add(annotation.annotation_class.name)
return {k: v for (v, k) in enumerate(sorted(classes))}
save_class_index(class_index, output_dir)


def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> str:
yolo_lines = []
for annotation in annotation_file.annotations:
annotation_type = annotation.annotation_class.annotation_type

if isinstance(annotation, dt.VideoAnnotation):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixes type error - possibly YOLO can support Video, but our usage of it in the exporter doesn't and never has.

raise ValueError("YOLO format does not support video annotations for export or conversion.")

if annotation_type == "bounding_box":
data = annotation.data
elif annotation_type in ["polygon", "complex_polygon"]:
Expand Down Expand Up @@ -86,11 +70,3 @@ def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> s

yolo_lines.append(f"{i} {x} {y} {w} {h}")
return "\n".join(yolo_lines)


def _save_class_index(class_index: ClassIndex, output_dir: Path) -> None:
sorted_items = sorted(class_index.items(), key=lambda item: item[1])

with open(output_dir / "darknet.labels", "w") as f:
for class_name, _ in sorted_items:
f.write(f"{class_name}\n")
262 changes: 262 additions & 0 deletions darwin/exporter/formats/yolo_segmented.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
from collections import namedtuple
from enum import Enum, auto
from logging import getLogger
from multiprocessing.pool import CLOSE
from pathlib import Path
from typing import Iterable, List

from darwin.datatypes import AnnotationFile, VideoAnnotation
from darwin.exporter.formats.helpers.yolo_class_builder import (
ClassIndex,
build_class_index,
export_file,
save_class_index,
)

logger = getLogger(__name__)

CLOSE_VERTICES: bool = False # Set true if polygons need to be closed
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are relatively confident that we don't need to close vertices on a polygon, but switching this constant to True will make the exporter close vertices by adding the first coordinate at the end.



Point = namedtuple("Point", ["x", "y"])


def export(annotation_files: Iterable[AnnotationFile], output_dir: Path) -> None:
"""
Exports YoloV8 format as segments

Parameters
----------
annotation_files : Iterable[AnnotationFile]
The ``AnnotationFile``\\s to be exported.
output_dir : Path
The folder where the new pascalvoc files will be.

Returns
-------
None
"""
annotation_files = list(annotation_files)

class_index: ClassIndex = build_class_index(
# fmt: off
annotation_files, ["bounding_box", "polygon"]
) # fmt: on

for annotation_file in annotation_files:
export_file(annotation_file, class_index, output_dir, _build_text)

save_class_index(class_index, output_dir)


def normalise(value: float, height_or_width: int) -> float:
"""
Normalises the value to a proportion of the image size

Parameters
----------
value : float
The value to be normalised.
height_or_width : Union[float, int]
The height or width of the image.

Returns
-------
float
The normalised value.
"""
return value / height_or_width


class YoloSegmentedAnnotationType(Enum):
"""
The YoloV8 annotation types
"""

UNKNOWN = auto()
BOUNDING_BOX = auto()
POLYGON = auto()


def _determine_annotation_type(data: dict, annotation_index: int) -> YoloSegmentedAnnotationType:
if "x" in data and "y" in data and "w" in data and "h" in data:
return YoloSegmentedAnnotationType.BOUNDING_BOX
elif "points" in data:
if isinstance(data["points"][0], list):
logger.warn(f"Skipped annotation at index {annotation_index} because it's a complex polygon'")
return YoloSegmentedAnnotationType.UNKNOWN

return YoloSegmentedAnnotationType.POLYGON
else:
return YoloSegmentedAnnotationType.UNKNOWN


def _handle_bounding_box(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool:
logger.debug(f"Exporting bounding box at index {annotation_index}.")

try:
# Create 8 coordinates for the x,y pairs of the 4 corners
x1, y1, x2, y2, x3, y3, x4, y4, x5, y5 = (
data["x"],
data["y"],
(data["x"] + data["w"]),
(data["y"] + data["h"]),
(data["x"] + data["w"]),
data["y"],
data["x"],
(data["y"] + data["h"]),
data["x"],
data["y"],
)

logger.debug(
"Coordinates for bounding box: "
f"({x1}, {y1}), ({x2}, {y2}), "
f"({x3}, {y3}), ({x4}, {y4}), "
f"({x5}, {y5})" # Unsure if we have to close this.
)

# Normalize the coordinates to a proportion of the image size
n_x1 = normalise(x1, im_w)
n_y1 = normalise(y1, im_h)
n_x2 = normalise(x2, im_w)
n_y2 = normalise(y2, im_h)
n_x3 = normalise(x3, im_w)
n_y3 = normalise(y3, im_h)
n_x4 = normalise(x4, im_w)
n_y4 = normalise(y4, im_h)
n_x5 = normalise(x5, im_w)
n_y5 = normalise(y5, im_w)

logger.debug(
"Normalized coordinates for bounding box: "
f"({n_x1}, {n_y1}), ({n_x2}, {n_y2}), "
f"({n_x3}, {n_y3}), ({n_x4}, {n_y4}), "
f"({n_x5}, {n_y5})"
)

# Add the coordinates to the points list
points.append(Point(x=n_x1, y=n_y1))
points.append(Point(x=n_x2, y=n_y2))
points.append(Point(x=n_x3, y=n_y3))
points.append(Point(x=n_x4, y=n_y4))

if CLOSE_VERTICES:
points.append(Point(x=n_x5, y=n_y5))

except KeyError as exc:
logger.warn(
f"Skipped annotation at index {annotation_index} because an" "expected key was not found in the data.",
exc_info=exc,
)
return False

return True


def _handle_polygon(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool:
logger.debug(f"Exporting polygon at index {annotation_index}.")

last_point = None
try:
for point_index, point in enumerate(data["points"]):
last_point = point_index
x = point["x"] / im_w
y = point["y"] / im_h
points.append(Point(x=x, y=y))

if CLOSE_VERTICES:
points.append(points[0])

except KeyError as exc:
logger.warn(
f"Skipped annotation at index {annotation_index} because an"
"expected key was not found in the data."
f"Error occured while calculating point at index {last_point}."
if last_point
else "Error occured while enumerating points.",
exc_info=exc,
)
return False

except Exception as exc:
logger.error(f"An unexpected error occured while exporting annotation at index {annotation_index}.")

return True


def _build_text(annotation_file: AnnotationFile, class_index: ClassIndex) -> str:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive logging which largely only shows at DEBUG level, so won't usually show to users, but they could opt to see. Allows for good diagnostics if this turns out not to work as intended.

"""
Builds the YoloV8 format as segments

Parameters
----------
annotation_file : AnnotationFile
The ``AnnotationFile`` to be exported.
class_index : ClassIndex
The class index.

Returns
-------
str
The YoloV8 format as segments
"""
yolo_lines: List[str] = []

im_w = annotation_file.image_width
im_h = annotation_file.image_height

if not im_w or not im_h:
raise ValueError(
"Annotation file has no image width or height. "
"YoloV8 Segments are encoded as a proportion of height and width. "
"This file cannot be YoloV8 encoded without image dimensions."
)

for annotation_index, annotation in enumerate(annotation_file.annotations):
# Sanity checks
if isinstance(annotation, VideoAnnotation):
logger.warn(
f"Skipped annotation at index {annotation_index} because video annotations don't contain the needed data."
)
continue

if annotation.data is None:
owencjones marked this conversation as resolved.
Show resolved Hide resolved
logger.warn(f"Skipped annotation at index {annotation_index} because it's data fields are empty.'")
continue

# Process annotations

annotation_type = _determine_annotation_type(annotation.data, annotation_index)
if annotation_type == YoloSegmentedAnnotationType.UNKNOWN:
continue

data = annotation.data
points: List[Point] = []

if annotation_type == YoloSegmentedAnnotationType.BOUNDING_BOX:
bb_success = _handle_bounding_box(data, im_w, im_h, annotation_index, points)
if not bb_success:
continue
elif annotation_type == YoloSegmentedAnnotationType.POLYGON:
polygon_success = _handle_polygon(data, im_w, im_h, annotation_index, points)
if not polygon_success:
continue
else:
logger.warn(
f"Skipped annotation at index {annotation_index} because it's annotation type is not supported."
)
continue

if len(points) < 3:
logger.warn(
f"Skipped annotation at index {annotation_index} because it "
"has less than 3 points. Any valid polygon must have at least"
" 3 points."
)
continue

# Create the line for the annotation
yolo_line = f"{class_index[annotation.annotation_class.name]} {' '.join([f'{p.x} {p.y}' for p in points])}"
yolo_lines.append(yolo_line)
return "\n".join(yolo_lines)
Loading
Loading