-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[IO-1405][external] YoloV8 Segementation dataset support #643
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,5 +13,6 @@ | |
"semantic_mask_grey", | ||
"semantic_mask_index", | ||
"yolo", | ||
"yolo_segmented", | ||
"nifti", | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from pathlib import Path | ||
from typing import Callable, Dict, Iterable, List | ||
|
||
from darwin.datatypes import AnnotationFile | ||
|
||
ClassIndex = Dict[str, int] | ||
|
||
|
||
def build_class_index( | ||
annotation_files: Iterable[AnnotationFile], | ||
include_types: List[str] = ["bounding_box", "polygon", "complex_polygon"], | ||
) -> ClassIndex: | ||
classes = set() | ||
for annotation_file in annotation_files: | ||
for annotation in annotation_file.annotations: | ||
if annotation.annotation_class.annotation_type in include_types: | ||
classes.add(annotation.annotation_class.name) | ||
return {k: v for (v, k) in enumerate(sorted(classes))} | ||
|
||
|
||
def export_file( | ||
annotation_file: AnnotationFile, | ||
class_index: ClassIndex, | ||
output_dir: Path, | ||
build_function: Callable[[AnnotationFile, ClassIndex], str], | ||
) -> None: | ||
txt = build_function(annotation_file, class_index) | ||
|
||
# Just using `.with_suffix(".txt")` would remove all suffixes, so we need to | ||
# do it manually. | ||
|
||
filename = annotation_file.path.name | ||
filename_to_write = filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt" | ||
output_file_path = output_dir / filename_to_write | ||
|
||
output_file_path.parent.mkdir(parents=True, exist_ok=True) | ||
with open(output_file_path, "w") as f: | ||
f.write(txt) | ||
|
||
|
||
def save_class_index(class_index: ClassIndex, output_dir: Path) -> None: | ||
sorted_items = sorted(class_index.items(), key=lambda item: item[1]) | ||
|
||
with open(output_dir / "darknet.labels", "w") as f: | ||
for class_name, _ in sorted_items: | ||
f.write(f"{class_name}\n") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
from pathlib import Path | ||
from typing import Dict, Iterable | ||
from typing import Iterable | ||
|
||
import darwin.datatypes as dt | ||
|
||
ClassIndex = Dict[str, int] | ||
from darwin.exporter.formats.helpers.yolo_class_builder import ( | ||
ClassIndex, | ||
build_class_index, | ||
export_file, | ||
save_class_index, | ||
) | ||
|
||
|
||
def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> None: | ||
|
@@ -21,42 +25,22 @@ def export(annotation_files: Iterable[dt.AnnotationFile], output_dir: Path) -> N | |
|
||
annotation_files = list(annotation_files) | ||
|
||
class_index = _build_class_index(annotation_files) | ||
class_index = build_class_index(annotation_files) | ||
|
||
for annotation_file in annotation_files: | ||
_export_file(annotation_file, class_index, output_dir) | ||
|
||
_save_class_index(class_index, output_dir) | ||
|
||
|
||
def _export_file(annotation_file: dt.AnnotationFile, class_index: ClassIndex, output_dir: Path) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These functions now in the shared helper file |
||
txt = _build_txt(annotation_file, class_index) | ||
|
||
# Just using `.with_suffix(".txt")` would remove all suffixes, so we need to | ||
# do it manually. | ||
|
||
filename = annotation_file.path.name | ||
filename_to_write = filename.replace(".json", ".txt") if ".json" in filename else filename + ".txt" | ||
output_file_path = output_dir / filename_to_write | ||
|
||
output_file_path.parent.mkdir(parents=True, exist_ok=True) | ||
with open(output_file_path, "w") as f: | ||
f.write(txt) | ||
export_file(annotation_file, class_index, output_dir, _build_txt) | ||
|
||
|
||
def _build_class_index(annotation_files: Iterable[dt.AnnotationFile]) -> ClassIndex: | ||
classes = set() | ||
for annotation_file in annotation_files: | ||
for annotation in annotation_file.annotations: | ||
if annotation.annotation_class.annotation_type in ["bounding_box", "polygon", "complex_polygon"]: | ||
classes.add(annotation.annotation_class.name) | ||
return {k: v for (v, k) in enumerate(sorted(classes))} | ||
save_class_index(class_index, output_dir) | ||
|
||
|
||
def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> str: | ||
yolo_lines = [] | ||
for annotation in annotation_file.annotations: | ||
annotation_type = annotation.annotation_class.annotation_type | ||
|
||
if isinstance(annotation, dt.VideoAnnotation): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes type error - possibly YOLO can support Video, but our usage of it in the exporter doesn't and never has. |
||
raise ValueError("YOLO format does not support video annotations for export or conversion.") | ||
|
||
if annotation_type == "bounding_box": | ||
data = annotation.data | ||
elif annotation_type in ["polygon", "complex_polygon"]: | ||
|
@@ -86,11 +70,3 @@ def _build_txt(annotation_file: dt.AnnotationFile, class_index: ClassIndex) -> s | |
|
||
yolo_lines.append(f"{i} {x} {y} {w} {h}") | ||
return "\n".join(yolo_lines) | ||
|
||
|
||
def _save_class_index(class_index: ClassIndex, output_dir: Path) -> None: | ||
sorted_items = sorted(class_index.items(), key=lambda item: item[1]) | ||
|
||
with open(output_dir / "darknet.labels", "w") as f: | ||
for class_name, _ in sorted_items: | ||
f.write(f"{class_name}\n") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
from collections import namedtuple | ||
from enum import Enum, auto | ||
from logging import getLogger | ||
from multiprocessing.pool import CLOSE | ||
from pathlib import Path | ||
from typing import Iterable, List | ||
|
||
from darwin.datatypes import AnnotationFile, VideoAnnotation | ||
from darwin.exporter.formats.helpers.yolo_class_builder import ( | ||
ClassIndex, | ||
build_class_index, | ||
export_file, | ||
save_class_index, | ||
) | ||
|
||
logger = getLogger(__name__) | ||
|
||
CLOSE_VERTICES: bool = False # Set true if polygons need to be closed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are relatively confident that we don't need to close vertices on a polygon, but switching this constant to |
||
|
||
|
||
Point = namedtuple("Point", ["x", "y"]) | ||
|
||
|
||
def export(annotation_files: Iterable[AnnotationFile], output_dir: Path) -> None: | ||
""" | ||
Exports YoloV8 format as segments | ||
|
||
Parameters | ||
---------- | ||
annotation_files : Iterable[AnnotationFile] | ||
The ``AnnotationFile``\\s to be exported. | ||
output_dir : Path | ||
The folder where the new pascalvoc files will be. | ||
|
||
Returns | ||
------- | ||
None | ||
""" | ||
annotation_files = list(annotation_files) | ||
|
||
class_index: ClassIndex = build_class_index( | ||
# fmt: off | ||
annotation_files, ["bounding_box", "polygon"] | ||
) # fmt: on | ||
|
||
for annotation_file in annotation_files: | ||
export_file(annotation_file, class_index, output_dir, _build_text) | ||
|
||
save_class_index(class_index, output_dir) | ||
|
||
|
||
def normalise(value: float, height_or_width: int) -> float: | ||
""" | ||
Normalises the value to a proportion of the image size | ||
|
||
Parameters | ||
---------- | ||
value : float | ||
The value to be normalised. | ||
height_or_width : Union[float, int] | ||
The height or width of the image. | ||
|
||
Returns | ||
------- | ||
float | ||
The normalised value. | ||
""" | ||
return value / height_or_width | ||
|
||
|
||
class YoloSegmentedAnnotationType(Enum): | ||
""" | ||
The YoloV8 annotation types | ||
""" | ||
|
||
UNKNOWN = auto() | ||
BOUNDING_BOX = auto() | ||
POLYGON = auto() | ||
|
||
|
||
def _determine_annotation_type(data: dict, annotation_index: int) -> YoloSegmentedAnnotationType: | ||
if "x" in data and "y" in data and "w" in data and "h" in data: | ||
return YoloSegmentedAnnotationType.BOUNDING_BOX | ||
elif "points" in data: | ||
if isinstance(data["points"][0], list): | ||
logger.warn(f"Skipped annotation at index {annotation_index} because it's a complex polygon'") | ||
return YoloSegmentedAnnotationType.UNKNOWN | ||
|
||
return YoloSegmentedAnnotationType.POLYGON | ||
else: | ||
return YoloSegmentedAnnotationType.UNKNOWN | ||
|
||
|
||
def _handle_bounding_box(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool: | ||
logger.debug(f"Exporting bounding box at index {annotation_index}.") | ||
|
||
try: | ||
# Create 8 coordinates for the x,y pairs of the 4 corners | ||
x1, y1, x2, y2, x3, y3, x4, y4, x5, y5 = ( | ||
data["x"], | ||
data["y"], | ||
(data["x"] + data["w"]), | ||
(data["y"] + data["h"]), | ||
(data["x"] + data["w"]), | ||
data["y"], | ||
data["x"], | ||
(data["y"] + data["h"]), | ||
data["x"], | ||
data["y"], | ||
) | ||
|
||
logger.debug( | ||
"Coordinates for bounding box: " | ||
f"({x1}, {y1}), ({x2}, {y2}), " | ||
f"({x3}, {y3}), ({x4}, {y4}), " | ||
f"({x5}, {y5})" # Unsure if we have to close this. | ||
) | ||
|
||
# Normalize the coordinates to a proportion of the image size | ||
n_x1 = normalise(x1, im_w) | ||
n_y1 = normalise(y1, im_h) | ||
n_x2 = normalise(x2, im_w) | ||
n_y2 = normalise(y2, im_h) | ||
n_x3 = normalise(x3, im_w) | ||
n_y3 = normalise(y3, im_h) | ||
n_x4 = normalise(x4, im_w) | ||
n_y4 = normalise(y4, im_h) | ||
n_x5 = normalise(x5, im_w) | ||
n_y5 = normalise(y5, im_w) | ||
|
||
logger.debug( | ||
"Normalized coordinates for bounding box: " | ||
f"({n_x1}, {n_y1}), ({n_x2}, {n_y2}), " | ||
f"({n_x3}, {n_y3}), ({n_x4}, {n_y4}), " | ||
f"({n_x5}, {n_y5})" | ||
) | ||
|
||
# Add the coordinates to the points list | ||
points.append(Point(x=n_x1, y=n_y1)) | ||
points.append(Point(x=n_x2, y=n_y2)) | ||
points.append(Point(x=n_x3, y=n_y3)) | ||
points.append(Point(x=n_x4, y=n_y4)) | ||
|
||
if CLOSE_VERTICES: | ||
points.append(Point(x=n_x5, y=n_y5)) | ||
|
||
except KeyError as exc: | ||
logger.warn( | ||
f"Skipped annotation at index {annotation_index} because an" "expected key was not found in the data.", | ||
exc_info=exc, | ||
) | ||
return False | ||
|
||
return True | ||
|
||
|
||
def _handle_polygon(data: dict, im_w: int, im_h: int, annotation_index: int, points: List[Point]) -> bool: | ||
logger.debug(f"Exporting polygon at index {annotation_index}.") | ||
|
||
last_point = None | ||
try: | ||
for point_index, point in enumerate(data["points"]): | ||
last_point = point_index | ||
x = point["x"] / im_w | ||
y = point["y"] / im_h | ||
points.append(Point(x=x, y=y)) | ||
|
||
if CLOSE_VERTICES: | ||
points.append(points[0]) | ||
|
||
except KeyError as exc: | ||
logger.warn( | ||
f"Skipped annotation at index {annotation_index} because an" | ||
"expected key was not found in the data." | ||
f"Error occured while calculating point at index {last_point}." | ||
if last_point | ||
else "Error occured while enumerating points.", | ||
exc_info=exc, | ||
) | ||
return False | ||
|
||
except Exception as exc: | ||
logger.error(f"An unexpected error occured while exporting annotation at index {annotation_index}.") | ||
|
||
return True | ||
|
||
|
||
def _build_text(annotation_file: AnnotationFile, class_index: ClassIndex) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extensive logging which largely only shows at |
||
""" | ||
Builds the YoloV8 format as segments | ||
|
||
Parameters | ||
---------- | ||
annotation_file : AnnotationFile | ||
The ``AnnotationFile`` to be exported. | ||
class_index : ClassIndex | ||
The class index. | ||
|
||
Returns | ||
------- | ||
str | ||
The YoloV8 format as segments | ||
""" | ||
yolo_lines: List[str] = [] | ||
|
||
im_w = annotation_file.image_width | ||
im_h = annotation_file.image_height | ||
|
||
if not im_w or not im_h: | ||
raise ValueError( | ||
"Annotation file has no image width or height. " | ||
"YoloV8 Segments are encoded as a proportion of height and width. " | ||
"This file cannot be YoloV8 encoded without image dimensions." | ||
) | ||
|
||
for annotation_index, annotation in enumerate(annotation_file.annotations): | ||
# Sanity checks | ||
if isinstance(annotation, VideoAnnotation): | ||
logger.warn( | ||
f"Skipped annotation at index {annotation_index} because video annotations don't contain the needed data." | ||
) | ||
continue | ||
|
||
if annotation.data is None: | ||
owencjones marked this conversation as resolved.
Show resolved
Hide resolved
|
||
logger.warn(f"Skipped annotation at index {annotation_index} because it's data fields are empty.'") | ||
continue | ||
|
||
# Process annotations | ||
|
||
annotation_type = _determine_annotation_type(annotation.data, annotation_index) | ||
if annotation_type == YoloSegmentedAnnotationType.UNKNOWN: | ||
continue | ||
|
||
data = annotation.data | ||
points: List[Point] = [] | ||
|
||
if annotation_type == YoloSegmentedAnnotationType.BOUNDING_BOX: | ||
bb_success = _handle_bounding_box(data, im_w, im_h, annotation_index, points) | ||
if not bb_success: | ||
continue | ||
elif annotation_type == YoloSegmentedAnnotationType.POLYGON: | ||
polygon_success = _handle_polygon(data, im_w, im_h, annotation_index, points) | ||
if not polygon_success: | ||
continue | ||
else: | ||
logger.warn( | ||
f"Skipped annotation at index {annotation_index} because it's annotation type is not supported." | ||
) | ||
continue | ||
|
||
if len(points) < 3: | ||
logger.warn( | ||
f"Skipped annotation at index {annotation_index} because it " | ||
"has less than 3 points. Any valid polygon must have at least" | ||
" 3 points." | ||
) | ||
continue | ||
|
||
# Create the line for the annotation | ||
yolo_line = f"{class_index[annotation.annotation_class.name]} {' '.join([f'{p.x} {p.y}' for p in points])}" | ||
yolo_lines.append(yolo_line) | ||
return "\n".join(yolo_lines) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extracted some normal YOLO functions out for common usage by YOLO and YOLO segementation