Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 11 additions & 35 deletions darwin/dataset/local_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def __init__(
self.original_annotations_path: Optional[List[Path]] = None
self.keep_empty_annotations = keep_empty_annotations

release_path, annotations_dir, images_dir = self._initial_setup(
dataset_path, release_name
)
release_path, annotations_dir, images_dir = self._initial_setup(dataset_path, release_name)
self._validate_inputs(partition, split_type, annotation_type)
# Get the list of classes

Expand Down Expand Up @@ -122,9 +120,7 @@ def _validate_inputs(self, partition, split_type, annotation_type):
if split_type not in ["random", "stratified"]:
raise ValueError("split_type should be either 'random', 'stratified'")
if annotation_type not in ["tag", "polygon", "bounding_box"]:
raise ValueError(
"annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
)
raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")

def _setup_annotations_and_images(
self,
Expand Down Expand Up @@ -152,9 +148,7 @@ def _setup_annotations_and_images(
darwin_json, images_dir, with_folders, json_version, annotation_filepath
)
if image_path.exists():
if not keep_empty_annotations and is_stream_list_empty(
darwin_json["annotations"]
):
if not keep_empty_annotations and is_stream_list_empty(darwin_json["annotations"]):
continue
self.images_path.append(image_path)
self.annotations_path.append(annotation_filepath)
Expand Down Expand Up @@ -221,9 +215,7 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]:
parsed = parse_darwin_json(self.annotations_path[index], index)
return parsed.image_height, parsed.image_width

def extend(
self, dataset: "LocalDataset", extend_classes: bool = False
) -> "LocalDataset":
def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset":
"""
Extends the current dataset with another one.

Expand Down Expand Up @@ -318,10 +310,7 @@ def parse_json(self, index: int) -> Dict[str, Any]:
# Filter out unused classes and annotations of a different type
if self.classes is not None:
annotations = [
a
for a in annotations
if a.annotation_class.name in self.classes
and self.annotation_type_supported(a)
a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a)
]
return {
"image_id": index,
Expand All @@ -338,20 +327,15 @@ def annotation_type_supported(self, annotation) -> bool:
elif self.annotation_type == "bounding_box":
is_bounding_box = annotation_type == "bounding_box"
is_supported_polygon = (
annotation_type in ["polygon", "complex_polygon"]
and "bounding_box" in annotation.data
annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data
)
return is_bounding_box or is_supported_polygon
elif self.annotation_type == "polygon":
return annotation_type in ["polygon", "complex_polygon"]
else:
raise ValueError(
"annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
)
raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")

def measure_mean_std(
self, multi_threaded: bool = True
) -> Tuple[np.ndarray, np.ndarray]:
def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]:
"""
Computes mean and std of trained images, given the train loader.

Expand All @@ -374,9 +358,7 @@ def measure_mean_std(
results = pool.map(self._return_mean, self.images_path)
mean = np.sum(np.array(results), axis=0) / len(self.images_path)
# Online image_classification deviation
results = pool.starmap(
self._return_std, [[item, mean] for item in self.images_path]
)
results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path])
std_sum = np.sum(np.array([item[0] for item in results]), axis=0)
total_pixel_count = np.sum(np.array([item[1] for item in results]))
std = np.sqrt(std_sum / total_pixel_count)
Expand Down Expand Up @@ -422,20 +404,14 @@ def _compute_weights(labels: List[int]) -> np.ndarray:
@staticmethod
def _return_mean(image_path: Path) -> np.ndarray:
img = np.array(load_pil_image(image_path))
mean = np.array(
[np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]
)
mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])])
return mean / 255.0

# Loads an image with OpenCV and returns the channel wise std of the image.
@staticmethod
def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]:
img = np.array(load_pil_image(image_path)) / 255.0
m2 = np.square(
np.array(
[img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]
)
)
m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]))
return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0

def __getitem__(self, index: int):
Expand Down
42 changes: 10 additions & 32 deletions darwin/exporter/formats/darwin_1_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,17 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N
try:
output: DictFreeForm = _build_json(annotation_file)
except Exception as e:
raise ExportException_CouldNotBuildOutput(
f"Could not build output for {annotation_file.path}"
) from e
raise ExportException_CouldNotBuildOutput(f"Could not build output for {annotation_file.path}") from e

try:
with open(output_file_path, "w") as f:
op = json.dumps(
output,
option=json.OPT_INDENT_2
| json.OPT_SERIALIZE_NUMPY
| json.OPT_NON_STR_KEYS,
option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS,
).decode("utf-8")
f.write(op)
except Exception as e:
raise ExportException_CouldNotWriteFile(
f"Could not write output for {annotation_file.path}"
) from e
raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e


def _build_json(annotation_file: AnnotationFile) -> DictFreeForm:
Expand Down Expand Up @@ -136,17 +130,11 @@ def _build_sub_annotation(sub: SubAnnotation) -> DictFreeForm:
def _build_authorship(annotation: Union[VideoAnnotation, Annotation]) -> DictFreeForm:
annotators = {}
if annotation.annotators:
annotators = {
"annotators": [
_build_author(annotator) for annotator in annotation.annotators
]
}
annotators = {"annotators": [_build_author(annotator) for annotator in annotation.annotators]}

reviewers = {}
if annotation.reviewers:
reviewers = {
"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]
}
reviewers = {"annotators": [_build_author(reviewer) for reviewer in annotation.reviewers]}

return {**annotators, **reviewers}

Expand All @@ -155,19 +143,15 @@ def _build_video_annotation(annotation: VideoAnnotation) -> DictFreeForm:
return {
**annotation.get_data(
only_keyframes=False,
post_processing=lambda annotation, _: _build_image_annotation(
annotation, skip_slots=True
),
post_processing=lambda annotation, _: _build_image_annotation(annotation, skip_slots=True),
),
"name": annotation.annotation_class.name,
"slot_names": annotation.slot_names,
**_build_authorship(annotation),
}


def _build_image_annotation(
annotation: Annotation, skip_slots: bool = False
) -> DictFreeForm:
def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) -> DictFreeForm:
json_subs = {}
for sub in annotation.subs:
json_subs.update(_build_sub_annotation(sub))
Expand All @@ -185,9 +169,7 @@ def _build_image_annotation(
return {**base_json, "slot_names": annotation.slot_names}


def _build_legacy_annotation_data(
annotation_class: AnnotationClass, data: DictFreeForm
) -> DictFreeForm:
def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm:
v1_data = {}
polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"}

Expand Down Expand Up @@ -250,9 +232,7 @@ def build_image_annotation(annotation_file: AnnotationFile) -> Dict[str, Any]:
annotations: List[Dict[str, Any]] = []
for annotation in annotation_file.annotations:
payload = {
annotation.annotation_class.annotation_type: _build_annotation_data(
annotation
),
annotation.annotation_class.annotation_type: _build_annotation_data(annotation),
"name": annotation.annotation_class.name,
}

Expand Down Expand Up @@ -280,8 +260,6 @@ def _build_annotation_data(annotation: Annotation) -> Dict[str, Any]:
return {"path": annotation.data["paths"]}

if annotation.annotation_class.annotation_type == "polygon":
return dict(
filter(lambda item: item[0] != "bounding_box", annotation.data.items())
)
return dict(filter(lambda item: item[0] != "bounding_box", annotation.data.items()))

return dict(annotation.data)
4 changes: 3 additions & 1 deletion darwin/importer/formats/csv_tags_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]:
file_annotation_map[filename].append(annotation)
for filename in file_annotation_map:
annotations = file_annotation_map[filename]
annotation_classes = {annotation.annotation_class for annotation in annotations}
annotation_classes = {
annotation.annotation_class for annotation in annotations
}
filename_path = Path(filename)
remote_path = str(filename_path.parent)
if not remote_path.startswith("/"):
Expand Down
4 changes: 1 addition & 3 deletions darwin/importer/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,9 +603,7 @@ def _warn_unsupported_annotations(parsed_files: List[AnnotationFile]) -> None:
if annotation.annotation_class.annotation_type in UNSUPPORTED_CLASSES:
skipped_annotations.append(annotation)
if len(skipped_annotations) > 0:
types = {
c.annotation_class.annotation_type for c in skipped_annotations
} # noqa: C417
types = {c.annotation_class.annotation_type for c in skipped_annotations} # noqa: C417
console.print(
f"Import of annotation class types '{', '.join(types)}' is not yet supported. Skipping {len(skipped_annotations)} "
+ "annotations from '{parsed_file.full_path}'.\n",
Expand Down
32 changes: 6 additions & 26 deletions darwin/torch/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ class ClassificationDataset(LocalDataset):
be composed via torchvision.
"""

def __init__(
self, transform: Optional[Union[Callable, List]] = None, **kwargs
) -> None:
def __init__(self, transform: Optional[Union[Callable, List]] = None, **kwargs) -> None:
super().__init__(annotation_type="tag", **kwargs)

if transform is not None and isinstance(transform, list):
Expand Down Expand Up @@ -154,11 +152,7 @@ def get_target(self, index: int) -> Tensor:

data = self.parse_json(index)
annotations = data.pop("annotations")
tags = [
a.annotation_class.name
for a in annotations
if a.annotation_class.annotation_type == "tag"
]
tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]

if not self.is_multi_label:
# Binary or multiclass must have a label per image
Expand All @@ -182,11 +176,7 @@ def check_if_multi_label(self) -> None:
for idx in range(len(self)):
target = self.parse_json(idx)
annotations = target.pop("annotations")
tags = [
a.annotation_class.name
for a in annotations
if a.annotation_class.annotation_type == "tag"
]
tags = [a.annotation_class.name for a in annotations if a.annotation_class.annotation_type == "tag"]

if len(tags) > 1:
self.is_multi_label = True
Expand Down Expand Up @@ -334,15 +324,12 @@ def get_target(self, index: int) -> Dict[str, Any]:
path_key = "paths"

if path_key not in annotation.data:
print(
f"Warning: missing polygon in annotation {self.annotations_path[index]}"
)
print(f"Warning: missing polygon in annotation {self.annotations_path[index]}")
# Extract the sequences of coordinates from the polygon annotation
sequences = convert_polygons_to_sequences(
annotation.data[path_key],
height=target["height"],
width=target["width"],
rounding=False,
)
# Compute the bbox of the polygon
x_coords = [s[0::2] for s in sequences]
Expand All @@ -366,12 +353,7 @@ def get_target(self, index: int) -> Dict[str, Any]:

# Compute the area of the polygon
# TODO fix with addictive/subtractive paths in complex polygons
poly_area: float = np.sum(
[
polygon_area(x_coord, y_coord)
for x_coord, y_coord in zip(x_coords, y_coords)
]
)
poly_area: float = np.sum([polygon_area(x_coord, y_coord) for x_coord, y_coord in zip(x_coords, y_coords)])

# Create and append the new entry for this annotation
annotations.append(
Expand Down Expand Up @@ -423,9 +405,7 @@ class SemanticSegmentationDataset(LocalDataset):
Object used to convert polygons to semantic masks.
"""

def __init__(
self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs
):
def __init__(self, transform: Optional[Union[List[Callable], Callable]] = None, **kwargs):
super().__init__(annotation_type="polygon", **kwargs)
if "__background__" not in self.classes:
self.classes.insert(0, "__background__")
Expand Down
Loading