From a9b67aec38f8d9f3830a0956514cd0efc8e59bc4 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 17:44:13 +0100
Subject: [PATCH 1/6] Import warning for overwriting

---
 darwin/backend_v2.py        | 17 +++++++++++++++
 darwin/importer/importer.py | 41 +++++++++++++++++++++++++++++++++++--
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/darwin/backend_v2.py b/darwin/backend_v2.py
index ea0d2cfdd..9e4e84d7a 100644
--- a/darwin/backend_v2.py
+++ b/darwin/backend_v2.py
@@ -253,3 +253,20 @@ def register_items(self, payload: Dict[str, Any], team_slug: str) -> None:
         return self._client._post_raw(
             f"/v2/teams/{team_slug}/items/register_existing", payload
         )
+
+    def _get_remote_annotations(
+        self,
+        item_id: str,
+        team_slug: str,
+    ) -> List:
+        """
+        Returns the annotations currently present on a remote dataset item.
+
+        Parameters
+        ----------
+        item_id: str
+            The UUID of the item.
+        team_slug: str
+            The team slug.
+        """
+        return self._client._get(f"v2/teams/{team_slug}/items/{item_id}/annotations")
diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index 62a4d2e82..f855cfab3 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -182,7 +182,7 @@ def _build_attribute_lookup(dataset: "RemoteDataset") -> Dict[str, Unknown]:
 
 def _get_remote_files(
     dataset: "RemoteDataset", filenames: List[str], chunk_size: int = 100
-) -> Dict[str, Tuple[int, str]]:
+) -> Dict[str, Tuple[str, str]]:
     """
     Fetches remote files from the datasets in chunks; by default 100 filenames at a time.
 
@@ -790,7 +790,7 @@ def import_annotations(  # noqa: C901
     console.print("Fetching remote file list...", style="info")
     # This call will only filter by filename; so can return a superset of matched files across different paths
     # There is logic in this function to then include paths to narrow down to the single correct matching file
-    remote_files: Dict[str, Tuple[int, str]] = {}
+    remote_files: Dict[str, Tuple[str, str]] = {}
 
     # Try to fetch files in large chunks; in case the filenames are too large and exceed the url size
     # retry in smaller chunks
@@ -939,6 +939,14 @@ def import_annotations(  # noqa: C901
             file for file in parsed_files if file not in files_to_not_track
         ]
         if files_to_track:
+            if not append:
+                # Remember to add a flag that can bypass this warning!
+                # Add unit test(s) for this functionality at the end
+                continue_to_overwrite = _overwrite_warning(
+                    dataset.client, dataset, files_to_track, remote_files, console
+                )
+                if not continue_to_overwrite:
+                    return
             _warn_unsupported_annotations(files_to_track)
             for parsed_file in track(files_to_track):
                 image_id, default_slot_name = remote_files[parsed_file.full_path]
@@ -1355,3 +1363,32 @@ def _console_theme() -> Theme:
             "info": "bold deep_sky_blue1",
         }
     )
+
+
+def _overwrite_warning(
+    client: "Client",
+    dataset: "RemoteDataset",
+    files: List[dt.AnnotationFile],
+    remote_files: Dict[str, Tuple[str, str]],
+    console: Console,
+) -> bool:
+    files_to_overwrite = []
+    for file in files:
+        item_id = remote_files[file.full_path][0]
+        remote_annotations = client.api_v2._get_remote_annotations(
+            item_id,
+            dataset.team,
+        )
+        if remote_annotations:
+            files_to_overwrite.append(file)
+    if files_to_overwrite:
+        console.print(
+            "The following dataset items already have annotations that will be overwritten by this import:",
+            style="warning",
+        )
+        for file in files_to_overwrite:
+            console.print(f"- {file.full_path}", style="warning")
+        proceed = input("Do you want to proceed with the import? [y/N] ")
+        if proceed.lower() != "y":
+            return False
+    return True

From 6e0148a4c2fbd11d23eb76e0801a8c3848c09e8a Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 17:48:47 +0100
Subject: [PATCH 2/6] Removed duplicate prints when there are multiple imports
 to the same file

---
 darwin/importer/importer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index f855cfab3..87161897b 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -1379,15 +1379,15 @@ def _overwrite_warning(
             item_id,
             dataset.team,
         )
-        if remote_annotations:
-            files_to_overwrite.append(file)
+        if remote_annotations and file.full_path not in files_to_overwrite:
+            files_to_overwrite.append(file.full_path)
     if files_to_overwrite:
         console.print(
-            "The following dataset items already have annotations that will be overwritten by this import:",
+            f"The following {len(files_to_overwrite)} dataset items already have annotations that will be overwritten by this import:",
             style="warning",
         )
         for file in files_to_overwrite:
-            console.print(f"- {file.full_path}", style="warning")
+            console.print(f"- {file}", style="warning")
         proceed = input("Do you want to proceed with the import? [y/N] ")
         if proceed.lower() != "y":
             return False

From 0f6046cf3db583ced8108eb5a34b136d178061f0 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 17:59:10 +0100
Subject: [PATCH 3/6] Added SDK & CLI options to bypass the overwrite warning

---
 darwin/cli.py               | 1 +
 darwin/cli_functions.py     | 5 +++++
 darwin/importer/importer.py | 6 +++++-
 darwin/options.py           | 5 +++++
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/darwin/cli.py b/darwin/cli.py
index cafb80cbf..003f20802 100644
--- a/darwin/cli.py
+++ b/darwin/cli.py
@@ -166,6 +166,7 @@ def _run(args: Namespace, parser: ArgumentParser) -> None:
                 args.delete_for_empty,
                 args.import_annotators,
                 args.import_reviewers,
+                args.overwrite,
                 cpu_limit=args.cpu_limit,
             )
         elif args.action == "convert":
diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
index f814217dd..07624518e 100644
--- a/darwin/cli_functions.py
+++ b/darwin/cli_functions.py
@@ -851,6 +851,7 @@ def dataset_import(
     delete_for_empty: bool = False,
     import_annotators: bool = False,
     import_reviewers: bool = False,
+    overwrite: bool = False,
     use_multi_cpu: bool = False,
     cpu_limit: Optional[int] = None,
 ) -> None:
@@ -881,6 +882,9 @@ def dataset_import(
     import_reviewers : bool, default: False
         If ``True`` it will import the reviewers from the files to the dataset, if .
         If ``False`` it will not import the reviewers.
+    overwrite : bool, default: False
+        If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
+        If ``False`` this warning will be skipped and the import will overwrite the current annotations.
     use_multi_cpu : bool, default: False
         If ``True`` it will use all multiple CPUs to speed up the import process.
     cpu_limit : Optional[int], default: Core count - 2
@@ -904,6 +908,7 @@ def dataset_import(
             delete_for_empty,
             import_annotators,
             import_reviewers,
+            overwrite,
             use_multi_cpu,
             cpu_limit,
         )
diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index 87161897b..5e094277f 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -673,6 +673,7 @@ def import_annotations(  # noqa: C901
     delete_for_empty: bool = False,
     import_annotators: bool = False,
     import_reviewers: bool = False,
+    overwrite: bool = False,
     use_multi_cpu: bool = False,  # Set to False to give time to resolve MP behaviours
     cpu_limit: Optional[int] = None,  # 0 because it's set later in logic
 ) -> None:
@@ -704,6 +705,9 @@ def import_annotations(  # noqa: C901
     import_reviewers : bool, default: False
         If ``True`` it will import the reviewers from the files to the dataset, if .
         If ``False`` it will not import the reviewers.
+    overwrite : bool, default: False
+        If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
+        If ``False`` this warning will be skipped and the import will overwrite the current annotations.
     use_multi_cpu : bool, default: True
         If ``True`` will use multiple available CPU cores to parse the annotation files.
         If ``False`` will use only the current Python process, which runs in one core.
@@ -939,7 +943,7 @@ def import_annotations(  # noqa: C901
             file for file in parsed_files if file not in files_to_not_track
         ]
         if files_to_track:
-            if not append:
+            if not append and not overwrite:
                 # Remember to add a flag that can bypass this warning!
                 # Add unit test(s) for this functionality at the end
                 continue_to_overwrite = _overwrite_warning(
diff --git a/darwin/options.py b/darwin/options.py
index 0a9b5151f..1411a8385 100644
--- a/darwin/options.py
+++ b/darwin/options.py
@@ -333,6 +333,11 @@ def __init__(self) -> None:
             action="store_true",
             help="Import reviewers metadata from the annotation files, where available",
         )
+        parser_import.add_argument(
+            "--overwrite",
+            action="store_true",
+            help="Bypass warnings about overwiting existing annotations.",
+        )
 
         # Cpu limit for multiprocessing tasks
         def cpu_default_types(input: Any) -> Optional[int]:  # type: ignore

From 998e5e85c876f30cb03839d93cc09a357712473f Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 18:02:00 +0100
Subject: [PATCH 4/6] Code documentation

---
 darwin/importer/importer.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index 5e094277f..facf0816c 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -944,8 +944,6 @@ def import_annotations(  # noqa: C901
         ]
         if files_to_track:
             if not append and not overwrite:
-                # Remember to add a flag that can bypass this warning!
-                # Add unit test(s) for this functionality at the end
                 continue_to_overwrite = _overwrite_warning(
                     dataset.client, dataset, files_to_track, remote_files, console
                 )
@@ -1376,6 +1374,28 @@ def _overwrite_warning(
     remote_files: Dict[str, Tuple[str, str]],
     console: Console,
 ) -> bool:
+    """
+    Determines if any dataset items targeted for import already have annotations that will be overwritten.
+    If they do, a warning is displayed to the user and they are prompted to confirm if they want to proceed with the import.
+
+    Parameters
+    ----------
+    client : Client
+        The Darwin Client object.
+    dataset : RemoteDataset
+        The dataset where the annotations will be imported.
+    files : List[dt.AnnotationFile]
+        The list of annotation files that will be imported.
+    remote_files : Dict[str, Tuple[str, str]]
+        A dictionary of the remote files in the dataset.
+    console : Console
+        The console object.
+
+    Returns
+    -------
+    bool
+        True if the user wants to proceed with the import, False otherwise.
+    """
     files_to_overwrite = []
     for file in files:
         item_id = remote_files[file.full_path][0]

From d4a07223086c6470e726703e07a158be8b9d52f0 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 18:27:50 +0100
Subject: [PATCH 5/6] Unit tests

---
 darwin/cli_functions.py                |  2 +-
 darwin/importer/importer.py            |  2 +-
 tests/darwin/importer/importer_test.py | 92 +++++++++++++++++++++++++-
 3 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py
index 07624518e..efde7456f 100644
--- a/darwin/cli_functions.py
+++ b/darwin/cli_functions.py
@@ -884,7 +884,7 @@ def dataset_import(
         If ``False`` it will not import the reviewers.
     overwrite : bool, default: False
         If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
-        If ``False`` this warning will be skipped and the import will overwrite the current annotations.
+        If ``False`` this warning will be skipped and the import will overwrite the current annotations without warning.
     use_multi_cpu : bool, default: False
         If ``True`` it will use all multiple CPUs to speed up the import process.
     cpu_limit : Optional[int], default: Core count - 2
diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index facf0816c..66ae30056 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -707,7 +707,7 @@ def import_annotations(  # noqa: C901
         If ``False`` it will not import the reviewers.
     overwrite : bool, default: False
         If ``True`` it will bypass a warning that the import will overwrite the current annotations if any are present.
-        If ``False`` this warning will be skipped and the import will overwrite the current annotations.
+        If ``False`` this warning will be skipped and the import will overwrite the current annotations without warning.
     use_multi_cpu : bool, default: True
         If ``True`` will use multiple available CPU cores to parse the annotation files.
         If ``False`` will use only the current Python process, which runs in one core.
diff --git a/tests/darwin/importer/importer_test.py b/tests/darwin/importer/importer_test.py
index 5e2534a30..d86a552c1 100644
--- a/tests/darwin/importer/importer_test.py
+++ b/tests/darwin/importer/importer_test.py
@@ -1,13 +1,13 @@
 import json
 from pathlib import Path
 from typing import List, Tuple
-from unittest.mock import Mock, _patch, patch
+from unittest.mock import MagicMock, Mock, _patch, patch
 
 import pytest
 from rich.theme import Theme
 
 from darwin import datatypes as dt
-from darwin.importer.importer import _parse_empty_masks
+from darwin.importer.importer import _overwrite_warning, _parse_empty_masks
 
 
 def root_path(x: str) -> str:
@@ -593,3 +593,91 @@ def test_console_theme() -> None:
     from darwin.importer.importer import _console_theme
 
     assert isinstance(_console_theme(), Theme)
+
+
+def test_overwrite_warning_proceeds_with_import():
+    annotations: List[dt.AnnotationLike] = [
+        dt.Annotation(
+            dt.AnnotationClass("cat1", "polygon"),
+            {
+                "paths": [
+                    [
+                        {"x": -1, "y": -1},
+                        {"x": -1, "y": 1},
+                        {"x": 1, "y": 1},
+                        {"x": 1, "y": -1},
+                        {"x": -1, "y": -1},
+                    ]
+                ],
+                "bounding_box": {"x": -1, "y": -1, "w": 2, "h": 2},
+            },
+        )
+    ]
+    client = MagicMock()
+    dataset = MagicMock()
+    files = [
+        dt.AnnotationFile(
+            path=Path("/"),
+            filename="file1",
+            annotation_classes={a.annotation_class for a in annotations},
+            annotations=annotations,
+            remote_path="/",
+        ),
+        dt.AnnotationFile(
+            path=Path("/"),
+            filename="file2",
+            annotation_classes={a.annotation_class for a in annotations},
+            annotations=annotations,
+            remote_path="/",
+        ),
+    ]
+    remote_files = {"/file1": ("id1", "path1"), "/file2": ("id2", "path2")}
+    console = MagicMock()
+
+    with patch("builtins.input", return_value="y"):
+        result = _overwrite_warning(client, dataset, files, remote_files, console)
+        assert result is True
+
+
+def test_overwrite_warning_aborts_import():
+    annotations: List[dt.AnnotationLike] = [
+        dt.Annotation(
+            dt.AnnotationClass("cat1", "polygon"),
+            {
+                "paths": [
+                    [
+                        {"x": -1, "y": -1},
+                        {"x": -1, "y": 1},
+                        {"x": 1, "y": 1},
+                        {"x": 1, "y": -1},
+                        {"x": -1, "y": -1},
+                    ]
+                ],
+                "bounding_box": {"x": -1, "y": -1, "w": 2, "h": 2},
+            },
+        )
+    ]
+    client = MagicMock()
+    dataset = MagicMock()
+    files = [
+        dt.AnnotationFile(
+            path=Path("/"),
+            filename="file1",
+            annotation_classes={a.annotation_class for a in annotations},
+            annotations=annotations,
+            remote_path="/",
+        ),
+        dt.AnnotationFile(
+            path=Path("/"),
+            filename="file2",
+            annotation_classes={a.annotation_class for a in annotations},
+            annotations=annotations,
+            remote_path="/",
+        ),
+    ]
+    remote_files = {"/file1": ("id1", "path1"), "/file2": ("id2", "path2")}
+    console = MagicMock()
+
+    with patch("builtins.input", return_value="n"):
+        result = _overwrite_warning(client, dataset, files, remote_files, console)
+        assert result is False

From 6661ac2888083ebd0fdac855a1f032b4eab6d68b Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 6 May 2024 22:06:59 +0100
Subject: [PATCH 6/6] Fix for multiple files

---
 darwin/importer/importer.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/darwin/importer/importer.py b/darwin/importer/importer.py
index 66ae30056..463a35791 100644
--- a/darwin/importer/importer.py
+++ b/darwin/importer/importer.py
@@ -905,6 +905,13 @@ def import_annotations(  # noqa: C901
             style="info",
         )
 
+    if not append and not overwrite:
+        continue_to_overwrite = _overwrite_warning(
+            dataset.client, dataset, local_files, remote_files, console
+        )
+        if not continue_to_overwrite:
+            return
+
     # Need to re parse the files since we didn't save the annotations in memory
     for local_path in set(local_file.path for local_file in local_files):  # noqa: C401
         imported_files: Union[List[dt.AnnotationFile], dt.AnnotationFile, None] = (
@@ -943,12 +950,6 @@ def import_annotations(  # noqa: C901
             file for file in parsed_files if file not in files_to_not_track
         ]
         if files_to_track:
-            if not append and not overwrite:
-                continue_to_overwrite = _overwrite_warning(
-                    dataset.client, dataset, files_to_track, remote_files, console
-                )
-                if not continue_to_overwrite:
-                    return
             _warn_unsupported_annotations(files_to_track)
             for parsed_file in track(files_to_track):
                 image_id, default_slot_name = remote_files[parsed_file.full_path]
@@ -1370,7 +1371,7 @@ def _console_theme() -> Theme:
 def _overwrite_warning(
     client: "Client",
     dataset: "RemoteDataset",
-    files: List[dt.AnnotationFile],
+    local_files: List[dt.AnnotationFile],
     remote_files: Dict[str, Tuple[str, str]],
     console: Console,
 ) -> bool:
@@ -1385,7 +1386,7 @@ def _overwrite_warning(
     dataset : RemoteDataset
         The dataset where the annotations will be imported.
     files : List[dt.AnnotationFile]
-        The list of annotation files that will be imported.
+        The list of local annotation files to will be imported.
     remote_files : Dict[str, Tuple[str, str]]
         A dictionary of the remote files in the dataset.
     console : Console
@@ -1397,14 +1398,13 @@ def _overwrite_warning(
         True if the user wants to proceed with the import, False otherwise.
     """
     files_to_overwrite = []
-    for file in files:
-        item_id = remote_files[file.full_path][0]
+    for local_file in local_files:
         remote_annotations = client.api_v2._get_remote_annotations(
-            item_id,
+            local_file.item_id,
             dataset.team,
         )
-        if remote_annotations and file.full_path not in files_to_overwrite:
-            files_to_overwrite.append(file.full_path)
+        if remote_annotations and local_file.full_path not in files_to_overwrite:
+            files_to_overwrite.append(local_file.full_path)
     if files_to_overwrite:
         console.print(
             f"The following {len(files_to_overwrite)} dataset items already have annotations that will be overwritten by this import:",