diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index b2c4dff89..2286fb1a8 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -350,14 +350,14 @@ def annotation_type_supported(self, annotation) -> bool: ) def measure_mean_std( - self, multi_threaded: bool = True + self, multi_processed: bool = True ) -> Tuple[np.ndarray, np.ndarray]: """ Computes mean and std of trained images, given the train loader. Parameters ---------- - multi_threaded : bool, default: True + multi_processed : bool, default: True Uses multiprocessing to download the dataset in parallel. Returns @@ -367,7 +367,7 @@ def measure_mean_std( std : ndarray[double] Standard deviation (for each channel) of all pixels of the images in the input folder. """ - if multi_threaded: + if multi_processed: # Set up a pool of workers with mp.Pool(mp.cpu_count()) as pool: # Online mean diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 7f9cc0467..9f7d967c5 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -183,7 +183,7 @@ def pull( *, release: Optional[Release] = None, blocking: bool = True, - multi_threaded: bool = True, + multi_processed: bool = True, only_annotations: bool = False, force_replace: bool = False, remove_extra: bool = False, @@ -203,7 +203,7 @@ def pull( The release to pull. blocking : bool, default: True If False, the dataset is not downloaded and a generator function is returned instead. - multi_threaded : bool, default: True + multi_processed : bool, default: True Uses multiprocessing to download the dataset in parallel. If blocking is False this has no effect. only_annotations : bool, default: False Download only the annotations and no corresponding images. @@ -364,7 +364,7 @@ def pull( successes, errors = exhaust_generator( progress=progress(), count=count, - multi_threaded=multi_threaded, + multi_processed=multi_processed, worker_count=max_workers, ) if errors: diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py index 7aa3fa1bc..7e9ecfee9 100644 --- a/darwin/dataset/utils.py +++ b/darwin/dataset/utils.py @@ -223,12 +223,12 @@ def _f(x: Any) -> Any: def exhaust_generator( progress: Generator, count: int, - multi_threaded: bool, + multi_processed: bool, worker_count: Optional[int] = None, ) -> Tuple[List[Dict[str, Any]], List[Exception]]: """ - Exhausts the generator passed as parameter. Can be done multi threaded if desired. + Exhausts the generator passed as parameter. Can be done multi processed if desired. Creates and returns a coco record from the given annotation. Uses ``BoxMode.XYXY_ABS`` from ``detectron2.structures`` if available, defaults to ``box_mode = 0`` @@ -260,7 +260,7 @@ def exhaust_generator( """ successes = [] errors = [] - if multi_threaded: + if multi_processed: progress_bar: ProgressBar = ProgressBar(total=count) responses = [] diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py index 01827c7a9..b9ebb8a0f 100644 --- a/darwin/importer/importer.py +++ b/darwin/importer/importer.py @@ -730,11 +730,11 @@ def _get_overwrite_value(append: bool) -> str: def _parse_empty_masks( - annotation: dt.Annotation, - raster_layer: dt.Annotation, - raster_layer_dense_rle_ids: Optional[Set[str]] = None, - raster_layer_dense_rle_ids_frames: Optional[Dict[int, Set[str]]] = None, - ): + annotation: dt.Annotation, + raster_layer: dt.Annotation, + raster_layer_dense_rle_ids: Optional[Set[str]] = None, + raster_layer_dense_rle_ids_frames: Optional[Dict[int, Set[str]]] = None, +): """ Check if the mask is empty (i.e. masks that do not have a corresponding raster layer) if so, skip import of the mask. This function is used for both dt.Annotation and dt.VideoAnnotation objects. @@ -749,13 +749,17 @@ def _parse_empty_masks( tuple[Optional[Set[str]], Optional[Dict[int, Set[str]]]]: raster_layer_dense_rle_ids, raster_layer_dense_rle_ids_frames """ # For dt.VideoAnnotation, create dense_rle ids for each frame. - if raster_layer_dense_rle_ids_frames is None and isinstance(annotation, dt.VideoAnnotation): + if raster_layer_dense_rle_ids_frames is None and isinstance( + annotation, dt.VideoAnnotation + ): assert isinstance(raster_layer, dt.VideoAnnotation) # build a dict of frame_index: set of dense_rle_ids (for each frame in VideoAnnotation object) raster_layer_dense_rle_ids_frames = {} for frame_index, _rl in raster_layer.frames.items(): - raster_layer_dense_rle_ids_frames[frame_index] = set(_rl.data["dense_rle"][::2]) + raster_layer_dense_rle_ids_frames[frame_index] = set( + _rl.data["dense_rle"][::2] + ) # check every frame # - if the 'annotation_class_id' is in raster_layer's mask_annotation_ids_mapping dict @@ -764,22 +768,26 @@ def _parse_empty_masks( for frame_index, _annotation in annotation.frames.items(): _annotation_id = _annotation.id if ( - frame_index in raster_layer_dense_rle_ids_frames and - raster_layer.frames[frame_index].data["mask_annotation_ids_mapping"][_annotation_id] + frame_index in raster_layer_dense_rle_ids_frames + and raster_layer.frames[frame_index].data[ + "mask_annotation_ids_mapping" + ][_annotation_id] not in raster_layer_dense_rle_ids_frames[frame_index] ): # skip import of the mask, and remove it from mask_annotation_ids_mapping logger.warning( f"Skipping import of mask annotation '{_annotation.annotation_class.name}' as it does not have a corresponding raster layer" ) - del raster_layer.frames[frame_index]["mask_annotation_ids_mapping"][_annotation_id] + del raster_layer.frames[frame_index]["mask_annotation_ids_mapping"][ + _annotation_id + ] return raster_layer_dense_rle_ids, raster_layer_dense_rle_ids_frames # For dt.Annotation, create dense_rle ids. elif raster_layer_dense_rle_ids is None and isinstance(annotation, dt.Annotation): assert isinstance(raster_layer, dt.Annotation) - # build a set of dense_rle_ids (for the Annotation object) + # build a set of dense_rle_ids (for the Annotation object) raster_layer_dense_rle_ids = set(raster_layer.data["dense_rle"][::2]) # check the annotation (i.e. mask) @@ -800,6 +808,7 @@ def _parse_empty_masks( return raster_layer_dense_rle_ids, raster_layer_dense_rle_ids_frames + def _import_annotations( client: "Client", # TODO: This is unused, should it be? id: Union[str, int], @@ -861,11 +870,14 @@ def _import_annotations( None, ) if raster_layer: - raster_layer_dense_rle_ids, raster_layer_dense_rle_ids_frames = _parse_empty_masks( + ( + raster_layer_dense_rle_ids, + raster_layer_dense_rle_ids_frames, + ) = _parse_empty_masks( annotation, raster_layer, raster_layer_dense_rle_ids, - raster_layer_dense_rle_ids_frames + raster_layer_dense_rle_ids_frames, ) actors: List[dt.DictFreeForm] = []