[ANN-760][external] nifti import to multi slot item (#549)

* removed pixdim and added original affine logic * removed pixdims * removed print * fixed issue locally, haven't done full testing * working version * PR changes * updated darwin-py string to reflect new pip install mechanism * fix for handle_video bug * removed print statement * fixed bad merge * introduced slot_names and is_mpr flag allowing users to upload a nifti annotation to a single slot or group of slots * fixed for anisotropic files uploaded * added pixdim logic * changes made to nifty multi-slot * removed image mode for nifti imports * added a multi-slot test * fixed import that was commented out * changed data.zip * points to the right file * changes made to add new error unit test * changes made to add new error unit test * removed name to local directory * nathan's requested change * reverted nathan's comment --------- Co-authored-by: Nooshin Ghavami <nooshinghavami@Nooshins-MacBook-Pro.local> Co-authored-by: Nooshin Ghavami <nooshinghavami@Nooshins-MBP.broadband>
v7labs · May 10, 2023 · 6cdf6ed · 6cdf6ed
1 parent 68c9a54
commit 6cdf6ed
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 53 deletions.
diff --git a/darwin/exporter/formats/nifti.py b/darwin/exporter/formats/nifti.py
@@ -1,3 +1,4 @@
+import ast
 import json as native_json
 from asyncore import loop
 from pathlib import Path
@@ -161,9 +162,6 @@ def get_view_idx(frame_idx, groups):
 
 
 def get_view_idx_from_slot_name(slot_name):
-    # if mpr:
-    #     #do this correct treatment volumetrically.
-    #     pass
     slot_names = {"0.1": 0, "0.2": 1, "0.3": 2}
     slot_names.get(slot_name, 0)
     return slot_names.get(slot_name, 0)
@@ -176,7 +174,7 @@ def process_metadata(metadata):
     original_affine = process_affine(metadata.get("original_affine"))
     # If the original affine is in the medical payload of metadata then use it
     if isinstance(pixdim, str):
-        pixdim = eval(pixdim)
+        pixdim = ast.literal_eval(pixdim)
         if isinstance(pixdim, tuple) or isinstance(pixdim, list):
             if len(pixdim) == 4:
                 pixdim = pixdim[1:]
@@ -195,7 +193,7 @@ def process_metadata(metadata):
 
 def process_affine(affine):
     if isinstance(affine, str):
-        affine = np.squeeze(np.array([eval(l) for l in affine.split("\n")]))
+        affine = np.squeeze(np.array([ast.literal_eval(l) for l in affine.split("\n")]))
     elif isinstance(affine, list):
         affine = np.array(affine).astype(np.float)
     else:

diff --git a/darwin/importer/formats/nifti.py b/darwin/importer/formats/nifti.py
@@ -1,8 +1,9 @@
+import sys
 import warnings
 import zipfile
 from collections import OrderedDict, defaultdict
 from pathlib import Path
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Union, Tuple
 
 import orjson as json
 from rich.console import Console
@@ -15,11 +16,11 @@
     import nibabel as nib
 except ImportError:
     import_fail_string = """
-    You must install darwin-py with pip install darwin-py\[medical]
+    You must install darwin-py with pip install nibabel connected-components-3d
     in order to import with using nifti format
     """
     console.print(import_fail_string)
-    exit()
+    sys.exit(1)
 import numpy as np
 from jsonschema import validate
 from upolygon import find_contours
@@ -68,13 +69,24 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]:
             path,
             class_map=nifti_annotation.get("class_map"),
             mode=nifti_annotation.get("mode", "image"),
+            slot_names=nifti_annotation.get("slot_names", []),
+            is_mpr=nifti_annotation.get("is_mpr", False),
         )
         annotation_files.append(annotation_file)
     return annotation_files
 
 
-def _parse_nifti(nifti_path: Path, filename: Path, json_path: Path, class_map: Dict, mode: str) -> dt.AnnotationFile:
-    img: np.ndarray = process_nifti(nib.load(nifti_path))
+def _parse_nifti(
+    nifti_path: Path,
+    filename: Path,
+    json_path: Path,
+    class_map: Dict,
+    mode: str,
+    slot_names: List[str],
+    is_mpr: bool,
+) -> dt.AnnotationFile:
+
+    img, pixdims = process_nifti(nib.load(nifti_path))
 
     shape = img.shape
     processed_class_map = process_class_map(class_map)
@@ -86,37 +98,26 @@ def _parse_nifti(nifti_path: Path, filename: Path, json_path: Path, class_map: D
             class_img = np.isin(img, class_idxs).astype(np.uint8)
             cc_img, num_labels = cc3d.connected_components(class_img, return_N=True)
             for instance_id in range(1, num_labels):
-                video_annotation = get_video_annotation(cc_img, class_idxs=[instance_id], class_name=class_name)
-                if video_annotation:
-                    video_annotations.append(video_annotation)
-    elif mode == "image":  # For each frame and each class produce a single frame video annotation
-        for i in range(shape[-1]):
-            slice_mask = img[:, :, i].astype(np.uint8)
-            for class_name, class_idxs in processed_class_map.items():
-                frame_annotations = {}
-                if class_name == "background":
-                    continue
-                class_mask = np.isin(slice_mask, class_idxs).astype(np.uint8).copy()
-                polygon = mask_to_polygon(mask=class_mask, class_name=class_name)
-                if polygon is None:
-                    continue
-                frame_annotations[i] = polygon
-                video_annotation = dt.make_video_annotation(
-                    frame_annotations,
-                    keyframes={i: True, i + 1: True},
-                    segments=[[i, i + 1]],
-                    interpolated=False,
-                    slot_names=[],
+                _video_annotations = get_video_annotation(
+                    cc_img,
+                    class_idxs=[instance_id],
+                    class_name=class_name,
+                    slot_names=slot_names,
+                    is_mpr=is_mpr,
+                    pixdims=pixdims,
                 )
-                video_annotations.append(video_annotation)
+                if _video_annotations:
+                    video_annotations += _video_annotations
     elif mode == "video":  # For each class produce a single video annotation
         for class_name, class_idxs in processed_class_map.items():
             if class_name == "background":
                 continue
-            video_annotation = get_video_annotation(img, class_idxs=class_idxs, class_name=class_name)
-            if video_annotation is None:
+            _video_annotations = get_video_annotation(
+                img, class_idxs=class_idxs, class_name=class_name, slot_names=slot_names, is_mpr=is_mpr, pixdims=pixdims
+            )
+            if _video_annotations is None:
                 continue
-            video_annotations.append(video_annotation)
+            video_annotations += _video_annotations
     annotation_classes = set(
         [dt.AnnotationClass(class_name, "polygon", "polygon") for class_name in class_map.values()]
     )
@@ -126,19 +127,53 @@ def _parse_nifti(nifti_path: Path, filename: Path, json_path: Path, class_map: D
         remote_path="/",
         annotation_classes=annotation_classes,
         annotations=video_annotations,
-        slots=[dt.Slot(name=None, type="dicom", source_files=[{"url": None, "file_name": str(filename)}])],
+        slots=[
+            dt.Slot(name=slot_name, type="dicom", source_files=[{"url": None, "file_name": str(filename)}])
+            for slot_name in slot_names
+        ],
     )
 
 
-def get_video_annotation(volume: np.ndarray, class_name: str, class_idxs: List[int]) -> Optional[dt.VideoAnnotation]:
+def get_video_annotation(
+    volume: np.ndarray,
+    class_name: str,
+    class_idxs: List[int],
+    slot_names: List[str],
+    is_mpr: bool,
+    pixdims: Tuple[float],
+) -> Optional[List[dt.VideoAnnotation]]:
+
+    if not is_mpr:
+        return nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=pixdims)
+    elif is_mpr and len(slot_names) == 3:
+        video_annotations = []
+        for view_idx, slot_name in enumerate(slot_names):
+            _video_annotations = nifti_to_video_annotation(
+                volume, class_name, class_idxs, [slot_name], view_idx=view_idx, pixdims=pixdims
+            )
+            video_annotations += _video_annotations
+        return video_annotations
+    else:
+        raise Exception("If is_mpr is True, slot_names must be of length 3")
+
+
+def nifti_to_video_annotation(volume, class_name, class_idxs, slot_names, view_idx=2, pixdims=(1, 1, 1)):
     frame_annotations = OrderedDict()
-    for i in range(volume.shape[-1]):
-        slice_mask = volume[:, :, i].astype(np.uint8)
+    for i in range(volume.shape[view_idx]):
+        if view_idx == 2:
+            slice_mask = volume[:, :, i].astype(np.uint8)
+            _pixdims = [pixdims[0], pixdims[1]]
+        elif view_idx == 1:
+            slice_mask = volume[:, i, :].astype(np.uint8)
+            _pixdims = [pixdims[0], pixdims[2]]
+        elif view_idx == 0:
+            slice_mask = volume[i, :, :].astype(np.uint8)
+            _pixdims = [pixdims[1], pixdims[2]]
+
         class_mask = np.isin(slice_mask, class_idxs).astype(np.uint8).copy()
         if class_mask.sum() == 0:
             continue
-
-        polygon = mask_to_polygon(mask=class_mask, class_name=class_name)
+        polygon = mask_to_polygon(mask=class_mask, class_name=class_name, pixdims=_pixdims)
         if polygon is None:
             continue
         frame_annotations[i] = polygon
@@ -154,12 +189,20 @@ def get_video_annotation(volume: np.ndarray, class_name: str, class_idxs: List[i
         keyframes={f_id: True for f_id in all_frame_ids},
         segments=segments,
         interpolated=False,
-        slot_names=[],
+        slot_names=slot_names,
     )
-    return video_annotation
+    return [video_annotation]
 
 
-def mask_to_polygon(mask: np.ndarray, class_name: str) -> Optional[dt.Annotation]:
+def mask_to_polygon(mask: np.ndarray, class_name: str, pixdims: List[float]) -> Optional[dt.Annotation]:
+    def adjust_for_pixdims(x, y, pixdims):
+        if pixdims[1] > pixdims[0]:
+            return {"x": y, "y": x * pixdims[1] / pixdims[0]}
+        elif pixdims[1] < pixdims[0]:
+            return {"x": y * pixdims[0] / pixdims[1], "y": x}
+        else:
+            return {"x": y, "y": x}
+
     _labels, external_paths, _internal_paths = find_contours(mask)
     # annotations = []
     if len(external_paths) > 1:
@@ -168,7 +211,7 @@ def mask_to_polygon(mask: np.ndarray, class_name: str) -> Optional[dt.Annotation
             # skip paths with less than 2 points
             if len(external_path) // 2 <= 2:
                 continue
-            path = [{"x": y, "y": x} for x, y in zip(external_path[0::2], external_path[1::2])]
+            path = [adjust_for_pixdims(x, y, pixdims) for x, y in zip(external_path[0::2], external_path[1::2])]
             paths.append(path)
         if len(paths) > 1:
             polygon = dt.make_complex_polygon(class_name, paths)
@@ -185,7 +228,7 @@ def mask_to_polygon(mask: np.ndarray, class_name: str) -> Optional[dt.Annotation
             return None
         polygon = dt.make_polygon(
             class_name,
-            point_path=[{"x": y, "y": x} for x, y in zip(external_path[0::2], external_path[1::2])],
+            point_path=[adjust_for_pixdims(x, y, pixdims) for x, y in zip(external_path[0::2], external_path[1::2])],
         )
     else:
         return None
@@ -297,4 +340,5 @@ def process_nifti(input_data: Union[Sequence[nib.nifti1.Nifti1Image], nib.nifti1
         # TODO: Future feature to pass custom ornt could go here.
         ornt = [[0.0, -1.0], [1.0, -1.0], [1.0, -1.0]]
         data_array = nib.orientations.apply_orientation(img.get_fdata(), ornt)
-        return data_array
+        pixdims = img.header.get_zooms()
+        return data_array, pixdims
diff --git a/darwin/importer/formats/nifti_schemas.py b/darwin/importer/formats/nifti_schemas.py
@@ -6,7 +6,9 @@
         "image": {"type": "string"},
         "label": {"type": "string"},
         "class_map": class_map,
-        "mode": {"type": "string", "enum": ["image", "video", "instances"]},
+        "mode": {"type": "string", "enum": ["video", "instances"]},
+        "is_mpr": {"type": "boolean"},
+        "slot_names": {"type": "array", "items": {"type": "string"}},
     },
     "required": ["image", "label", "class_map"],
     "additionalProperties": False,

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
@@ -144,8 +144,12 @@ def maybe_console(*args: Union[str, int, float]) -> None:
         parsed_files = list(map(importer, tqdm(files) if is_console else files))
 
     maybe_console("Finished.")
-
-    if not isinstance(parsed_files, list):
+    # Sometimes we have a list of lists of AnnotationFile, sometimes we have a list of AnnotationFile
+    # We flatten the list of lists
+    if isinstance(parsed_files, list):
+        if isinstance(parsed_files[0], list):
+            parsed_files = [item for sublist in parsed_files for item in sublist]
+    else:
         parsed_files = [parsed_files]
 
     parsed_files = [f for f in parsed_files if f is not None]
@@ -477,6 +481,8 @@ def import_annotations(
             for parsed_file in track(files_to_track):
 
                 image_id, default_slot_name = remote_files[parsed_file.full_path]
+                if parsed_file.slots:
+                    default_slot_name = parsed_file.slots[0].name
 
                 errors, succes = _import_annotations(
                     dataset.client,

diff --git a/tests/darwin/importer/formats/import_nifti_test.py b/tests/darwin/importer/formats/import_nifti_test.py
@@ -13,10 +13,12 @@
     VideoAnnotation,
 )
 from darwin.importer.formats.nifti import parse_path
+
+
 from tests.fixtures import *
 
 
-def test_image_annotation_nifti_import(team_slug: str):
+def test_image_annotation_nifti_import_single_slot(team_slug: str):
     with tempfile.TemporaryDirectory() as tmpdir:
         with ZipFile("tests/data.zip") as zfile:
             zfile.extractall(tmpdir)
@@ -40,6 +42,62 @@ def test_image_annotation_nifti_import(team_slug: str):
             assert output_json_string["annotations"][0]["frames"] == expected_json_string["annotations"][0]["frames"]
 
 
+def test_image_annotation_nifti_import_multi_slot(team_slug: str):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with ZipFile("tests/data.zip") as zfile:
+            zfile.extractall(tmpdir)
+            label_path = (
+                Path(tmpdir) / team_slug / "nifti" / "releases" / "latest" / "annotations" / "vol0_brain.nii.gz"
+            )
+            input_dict = {
+                "data": [
+                    {
+                        "image": "vol0 (1).nii",
+                        "label": str(label_path),
+                        "class_map": {"1": "brain"},
+                        "mode": "video",
+                        "is_mpr": True,
+                        "slot_names": ["0.3", "0.2", "0.1"],
+                    }
+                ]
+            }
+            upload_json = Path(tmpdir) / "annotations.json"
+            upload_json.write_text(json.dumps(input_dict, indent=4, sort_keys=True, default=str))
+            annotation_files = parse_path(path=upload_json)
+            annotation_file = annotation_files[0]
+            output_json_string = json.loads(serialise_annotation_file(annotation_file, as_dict=False))
+            expected_json_string = json.load(
+                open(Path(tmpdir) / team_slug / "nifti" / "vol0_annotation_file_multi_slot.json", "r")
+            )
+            json.dump(output_json_string, open("test_output_for_nifti_import_test_multi_slot.json", "w"), indent=4)
+            assert output_json_string["annotations"][0]["frames"] == expected_json_string["annotations"][0]["frames"]
+
+
+def test_image_annotation_nifti_import_incorrect_number_slot(team_slug: str):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with ZipFile("tests/data.zip") as zfile:
+            zfile.extractall(tmpdir)
+            label_path = (
+                Path(tmpdir) / team_slug / "nifti" / "releases" / "latest" / "annotations" / "vol0_brain.nii.gz"
+            )
+            input_dict = {
+                "data": [
+                    {
+                        "image": "vol0 (1).nii",
+                        "label": str(label_path),
+                        "class_map": {"1": "brain"},
+                        "mode": "video",
+                        "is_mpr": True,
+                        "slot_names": ["0.3", "0.2"],
+                    }
+                ]
+            }
+            upload_json = Path(tmpdir) / "annotations.json"
+            upload_json.write_text(json.dumps(input_dict, indent=4, sort_keys=True, default=str))
+            with pytest.raises(Exception):
+                annotation_files = parse_path(path=upload_json)
+
+
 def serialise_annotation_file(annotation_file: AnnotationFile, as_dict) -> Union[str, dict]:
     """
     Serialises an ``AnnotationFile`` into a string.
@@ -149,10 +207,12 @@ def serialise_sub_annotation(sub_annotation: SubAnnotation, as_dict: bool = True
         "data": [
             {
                 "image": "vol0 (1).nii",
-                "label": "tests/v7/nifti/releases/latest/annotations/vol0_brain.nii.gz",
+                "label": "tests/v7/v7-darwin-json-v1/nifti/releases/latest/annotations/vol0_brain.nii.gz",
                 "class_map": {
                     "1": "brain"
                 },
+                "is_mpr": true,
+                "slot_names": ["0.3", "0.2", "0.1"],
                 "mode": "video"
             }
         ]
@@ -161,9 +221,12 @@ def serialise_sub_annotation(sub_annotation: SubAnnotation, as_dict: bool = True
     with tempfile.TemporaryDirectory() as tmp_dir:
         path = Path(tmp_dir) / "annotations.json"
         path.write_text(input_json_string)
+        print(path)
         annotation_files = parse_path(path=path)
     if isinstance(annotation_files, list):
         annotation_file = annotation_files[0]
         output_json_string = serialise_annotation_file(annotation_file, as_dict=False)
-        with open(Path("tests") / "v7" / "nifti" / "vol0_annotation_file.json", "w") as f:
+        with open(
+            Path("tests") / "v7" / "v7-darwin-json-v1" / "nifti" / "vol0_annotation_file_multi_slot.json", "w"
+        ) as f:
             f.write(output_json_string)
diff --git a/tests/data.zip b/tests/data.zip