From 13363e923a6fa5538fa157d1e4765ba06c225187 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Thu, 16 Nov 2023 16:04:19 +0000 Subject: [PATCH 1/5] Added support for foldre structures when importing CSV tags --- darwin/importer/formats/csv_tags_video.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/darwin/importer/formats/csv_tags_video.py b/darwin/importer/formats/csv_tags_video.py index 35f05e230..6fe8bb389 100644 --- a/darwin/importer/formats/csv_tags_video.py +++ b/darwin/importer/formats/csv_tags_video.py @@ -54,6 +54,8 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: annotation_classes = set( [annotation.annotation_class for annotation in annotations] ) + remote_path = "/" + "/".join(filename.split("/")[:-1]) + filename = filename.split("/")[-1] files.append( dt.AnnotationFile( path, @@ -61,7 +63,7 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: annotation_classes, annotations, is_video=True, - remote_path="/", + remote_path=remote_path, ) ) return files From d5f70320bf5bd77b401953d419d071483174a303 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Tue, 21 Nov 2023 16:04:24 +0000 Subject: [PATCH 2/5] Added tests for CSV tags --- .../importer/formats/import_csv_tags_test.py | 52 ++++++++++++++ .../formats/import_csv_tags_video_test.py | 70 +++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tests/darwin/importer/formats/import_csv_tags_test.py create mode 100644 tests/darwin/importer/formats/import_csv_tags_video_test.py diff --git a/tests/darwin/importer/formats/import_csv_tags_test.py b/tests/darwin/importer/formats/import_csv_tags_test.py new file mode 100644 index 000000000..a8fac1c1d --- /dev/null +++ b/tests/darwin/importer/formats/import_csv_tags_test.py @@ -0,0 +1,52 @@ +import csv +from pathlib import Path +from typing import List, Optional + +import pytest + +from darwin.datatypes import AnnotationFile +from darwin.importer.formats.csv_tags import parse_path + + +class TestParsePath: + @pytest.fixture + def file_path(self, tmp_path: Path): + path = tmp_path / "annotation.csv" + yield path + path.unlink() + + def test_it_returns_none_if_file_extension_is_not_csv(self): + path = Path("path/to/file.xml") + assert parse_path(path) is None + + def test_it_parses_csv_file_correctly(self, file_path: Path): + with file_path.open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["image1.jpg", "tag1", "tag2"]) + writer.writerow(["image2.jpg", "tag3", "tag4"]) + + annotation_files: Optional[List[AnnotationFile]] = parse_path(file_path) + assert annotation_files is not None + + assert len(annotation_files) == 2 + assert annotation_files[0].filename == "image1.jpg" + assert len(annotation_files[0].annotations) == 2 + assert annotation_files[1].filename == "image2.jpg" + assert len(annotation_files[1].annotations) == 2 + + def test_folders_paths_are_parsed_correctly(self, file_path: Path): + with file_path.open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["folder1/folder2/image1.jpg", "tag1", "tag2"]) + writer.writerow(["folder/image2.jpg", "tag3", "tag4"]) + + annotation_files: Optional[List[AnnotationFile]] = parse_path(file_path) + assert annotation_files is not None + + assert len(annotation_files) == 2 + assert annotation_files[0].filename == "image1.jpg" + assert annotation_files[0].remote_path == "/folder1/folder2" + assert len(annotation_files[0].annotations) == 2 + assert annotation_files[1].filename == "image2.jpg" + assert annotation_files[1].remote_path == "/folder" + assert len(annotation_files[1].annotations) == 2 diff --git a/tests/darwin/importer/formats/import_csv_tags_video_test.py b/tests/darwin/importer/formats/import_csv_tags_video_test.py new file mode 100644 index 000000000..2719ec5ef --- /dev/null +++ b/tests/darwin/importer/formats/import_csv_tags_video_test.py @@ -0,0 +1,70 @@ +import csv +from pathlib import Path +from typing import List, Optional + +import pytest + +import darwin.datatypes as dt +from darwin.importer.formats.csv_tags_video import parse_path + + +class TestParsePathVideo: + @pytest.fixture + def file_path(self, tmp_path: Path): + path = tmp_path / "annotation_video.csv" + yield path + path.unlink() + + def test_it_returns_none_if_file_extension_is_not_csv(self, file_path: Path): + path = Path("path/to/file.xml") + assert parse_path(path) is None + + def test_it_parses_csv_file_correctly(self, file_path: Path): + with file_path.open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["video1.mp4", "tag1", "1", "10"]) + writer.writerow(["video2.mp4", "tag2", "5", "15"]) + + annotation_files: Optional[List[dt.AnnotationFile]] = parse_path(file_path) + assert annotation_files is not None + + assert len(annotation_files) == 2 + assert annotation_files[0].filename == "video1.mp4" + assert len(annotation_files[0].annotations) == 1 + assert annotation_files[1].filename == "video2.mp4" + assert len(annotation_files[1].annotations) == 1 + + def test_folders_paths_are_parsed_correctly(self, file_path: Path): + with file_path.open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["folder1/folder2/video1.mp4", "tag1", "1", "10"]) + writer.writerow(["folder/video2.mp4", "tag2", "5", "15"]) + + annotation_files: Optional[List[dt.AnnotationFile]] = parse_path(file_path) + assert annotation_files is not None + + assert len(annotation_files) == 2 + assert annotation_files[0].filename == "video1.mp4" + assert annotation_files[0].remote_path == "/folder1/folder2" + assert len(annotation_files[0].annotations) == 1 + assert annotation_files[1].filename == "video2.mp4" + assert annotation_files[1].remote_path == "/folder" + assert len(annotation_files[1].annotations) == 1 + + def test_keyframes_are_recorded_correctly(self, file_path: Path): + with file_path.open("w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["video1.mp4", "tag1", "1", "10"]) + + annotation_files: Optional[List[dt.AnnotationFile]] = parse_path(file_path) + assert annotation_files is not None + + assert len(annotation_files) == 1 + assert annotation_files[0].filename == "video1.mp4" + assert len(annotation_files[0].annotations) == 1 + + video_annotation = annotation_files[0].annotations[0] + assert isinstance(video_annotation, dt.VideoAnnotation) + + # Check that the keyframes are recorded correctly + assert video_annotation.keyframes == {i: i == 1 for i in range(1, 11)} From c504304fbd8eec9f87cffbf05f3f1680728f83b7 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Tue, 21 Nov 2023 16:22:46 +0000 Subject: [PATCH 3/5] Fixed failing test --- tests/darwin/importer/formats/import_csv_tags_video_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/darwin/importer/formats/import_csv_tags_video_test.py b/tests/darwin/importer/formats/import_csv_tags_video_test.py index 2719ec5ef..573f740e6 100644 --- a/tests/darwin/importer/formats/import_csv_tags_video_test.py +++ b/tests/darwin/importer/formats/import_csv_tags_video_test.py @@ -15,7 +15,7 @@ def file_path(self, tmp_path: Path): yield path path.unlink() - def test_it_returns_none_if_file_extension_is_not_csv(self, file_path: Path): + def test_it_returns_none_if_file_extension_is_not_csv(self): path = Path("path/to/file.xml") assert parse_path(path) is None From 31abed4136103abd9dce309ac69afbf68be3e294 Mon Sep 17 00:00:00 2001 From: John Wilkie <124276291+JBWilkie@users.noreply.github.com> Date: Mon, 11 Dec 2023 16:28:39 +0000 Subject: [PATCH 4/5] Update csv_tags_video.py --- darwin/importer/formats/csv_tags_video.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/darwin/importer/formats/csv_tags_video.py b/darwin/importer/formats/csv_tags_video.py index 0c6c76902..d972653cd 100644 --- a/darwin/importer/formats/csv_tags_video.py +++ b/darwin/importer/formats/csv_tags_video.py @@ -52,10 +52,13 @@ def parse_path(path: Path) -> Optional[List[dt.AnnotationFile]]: for filename in file_annotation_map: annotations = file_annotation_map[filename] annotation_classes = set( - [annotation.annotation_class for annotation in annotations] + annotation.annotation_class for annotation in annotations ) - remote_path = "/" + "/".join(filename.split("/")[:-1]) - filename = filename.split("/")[-1] + filename_path = Path(filename) + remote_path = str(filename_path.parent) + if not remote_path.startswith("/"): + remote_path = "/" + remote_path + filename = filename_path.name files.append( dt.AnnotationFile( path, From 2d6c772ee859d841957a6122bec70cf5086bf616 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 11 Dec 2023 19:28:27 +0000 Subject: [PATCH 5/5] Fixed compatibility issues with Windows filepaths --- .../darwin/importer/formats/import_csv_tags_video_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/darwin/importer/formats/import_csv_tags_video_test.py b/tests/darwin/importer/formats/import_csv_tags_video_test.py index 573f740e6..9805e2d5d 100644 --- a/tests/darwin/importer/formats/import_csv_tags_video_test.py +++ b/tests/darwin/importer/formats/import_csv_tags_video_test.py @@ -42,14 +42,14 @@ def test_folders_paths_are_parsed_correctly(self, file_path: Path): annotation_files: Optional[List[dt.AnnotationFile]] = parse_path(file_path) assert annotation_files is not None - assert len(annotation_files) == 2 assert annotation_files[0].filename == "video1.mp4" - assert annotation_files[0].remote_path == "/folder1/folder2" + if annotation_files[0].remote_path is not None: + assert Path(annotation_files[0].remote_path) == Path("/folder1/folder2") assert len(annotation_files[0].annotations) == 1 assert annotation_files[1].filename == "video2.mp4" - assert annotation_files[1].remote_path == "/folder" - assert len(annotation_files[1].annotations) == 1 + if annotation_files[1].remote_path is not None: + assert Path(annotation_files[1].remote_path) == Path("/folder") def test_keyframes_are_recorded_correctly(self, file_path: Path): with file_path.open("w", newline="") as f: