From 9ab8d3ab0e7b7a2b5300cae7f10ae918665340ab Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 7 Feb 2025 15:01:32 +0000 Subject: [PATCH 1/3] Replace Iterator with Iterable for better flexibility --- darwin/dataset/remote_dataset.py | 24 ++++++++++++------------ darwin/dataset/remote_dataset_v2.py | 25 +++++++++++++------------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index e0ded8fc0..8acbf8068 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -467,68 +467,68 @@ def fetch_remote_files( """ @abstractmethod - def archive(self, items: Iterator[DatasetItem]) -> None: + def archive(self, items: Iterable[DatasetItem]) -> None: """ Archives (soft-deletion) the given ``DatasetItem``\\s belonging to this ``RemoteDataset``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be archived. """ @abstractmethod - def restore_archived(self, items: Iterator[DatasetItem]) -> None: + def restore_archived(self, items: Iterable[DatasetItem]) -> None: """ Restores the archived ``DatasetItem``\\s that belong to this ``RemoteDataset``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be restored. """ @abstractmethod - def move_to_new(self, items: Iterator[DatasetItem]) -> None: + def move_to_new(self, items: Iterable[DatasetItem]) -> None: """ Changes the given ``DatasetItem``\\s status to ``new``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s whose status will change. """ @abstractmethod - def reset(self, items: Iterator[DatasetItem]) -> None: + def reset(self, items: Iterable[DatasetItem]) -> None: """ Resets the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be reset. """ @abstractmethod - def complete(self, items: Iterator[DatasetItem]) -> None: + def complete(self, items: Iterable[DatasetItem]) -> None: """ Completes the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be completed. """ @abstractmethod - def delete_items(self, items: Iterator[DatasetItem]) -> None: + def delete_items(self, items: Iterable[DatasetItem]) -> None: """ Deletes the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be deleted. """ diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 8956a8a3a..7819a671c 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -10,6 +10,7 @@ Sequence, Tuple, Union, + Iterable, ) import numpy as np from pydantic import ValidationError @@ -362,13 +363,13 @@ def fetch_remote_files( else: return - def archive(self, items: Iterator[DatasetItem]) -> None: + def archive(self, items: Iterable[DatasetItem]) -> None: """ Archives (soft-deletion) the given ``DatasetItem``\\s belonging to this ``RemoteDataset``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be archived. """ payload: Dict[str, Any] = { @@ -379,13 +380,13 @@ def archive(self, items: Iterator[DatasetItem]) -> None: } self.client.api_v2.archive_items(payload, team_slug=self.team) - def restore_archived(self, items: Iterator[DatasetItem]) -> None: + def restore_archived(self, items: Iterable[DatasetItem]) -> None: """ Restores the archived ``DatasetItem``\\s that belong to this ``RemoteDataset``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be restored. """ payload: Dict[str, Any] = { @@ -396,13 +397,13 @@ def restore_archived(self, items: Iterator[DatasetItem]) -> None: } self.client.api_v2.restore_archived_items(payload, team_slug=self.team) - def move_to_new(self, items: Iterator[DatasetItem]) -> None: + def move_to_new(self, items: Iterable[DatasetItem]) -> None: """ Changes the given ``DatasetItem``\\s status to ``new``. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s whose status will change. """ @@ -417,25 +418,25 @@ def move_to_new(self, items: Iterator[DatasetItem]) -> None: team_slug=self.team, ) - def reset(self, items: Iterator[DatasetItem]) -> None: + def reset(self, items: Iterable[DatasetItem]) -> None: """ Deprecated Resets the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be resetted. """ raise ValueError("Reset is deprecated for version 2 datasets") - def complete(self, items: Iterator[DatasetItem]) -> None: + def complete(self, items: Iterable[DatasetItem]) -> None: """ Completes the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be completed. """ (workflow_id, stages) = self._fetch_stages("complete") @@ -449,13 +450,13 @@ def complete(self, items: Iterator[DatasetItem]) -> None: team_slug=self.team, ) - def delete_items(self, items: Iterator[DatasetItem]) -> None: + def delete_items(self, items: Iterable[DatasetItem]) -> None: """ Deletes the given ``DatasetItem``\\s. Parameters ---------- - items : Iterator[DatasetItem] + items : Iterable[DatasetItem] The ``DatasetItem``\\s to be deleted. """ self.client.api_v2.delete_items( From 29b184509f5c4024eb283f282b3f7471a1d2dff1 Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Fri, 7 Feb 2025 15:08:46 +0000 Subject: [PATCH 2/3] Removed deprecated dataset.reset method --- darwin/cli_functions.py | 4 +--- darwin/dataset/remote_dataset.py | 11 ----------- darwin/dataset/remote_dataset_v2.py | 12 ------------ tests/darwin/cli_functions_test.py | 19 ------------------- 4 files changed, 1 insertion(+), 45 deletions(-) diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index c1954c60c..74d77e550 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -1059,7 +1059,7 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None: files: List[str] Names of the files we want to update. """ - available_statuses = ["archived", "clear", "new", "restore-archived", "complete"] + available_statuses = ["archived", "new", "restore-archived", "complete"] if status not in available_statuses: _error( f"Invalid status '{status}', available statuses: {', '.join(available_statuses)}" @@ -1075,8 +1075,6 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None: ) if status == "archived": dataset.archive(items) - elif status == "clear": - dataset.reset(items) elif status == "new": dataset.move_to_new(items) elif status == "restore-archived": diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index 8acbf8068..9f9752fbc 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -499,17 +499,6 @@ def move_to_new(self, items: Iterable[DatasetItem]) -> None: The ``DatasetItem``\\s whose status will change. """ - @abstractmethod - def reset(self, items: Iterable[DatasetItem]) -> None: - """ - Resets the given ``DatasetItem``\\s. - - Parameters - ---------- - items : Iterable[DatasetItem] - The ``DatasetItem``\\s to be reset. - """ - @abstractmethod def complete(self, items: Iterable[DatasetItem]) -> None: """ diff --git a/darwin/dataset/remote_dataset_v2.py b/darwin/dataset/remote_dataset_v2.py index 7819a671c..db8d55932 100644 --- a/darwin/dataset/remote_dataset_v2.py +++ b/darwin/dataset/remote_dataset_v2.py @@ -418,18 +418,6 @@ def move_to_new(self, items: Iterable[DatasetItem]) -> None: team_slug=self.team, ) - def reset(self, items: Iterable[DatasetItem]) -> None: - """ - Deprecated - Resets the given ``DatasetItem``\\s. - - Parameters - ---------- - items : Iterable[DatasetItem] - The ``DatasetItem``\\s to be resetted. - """ - raise ValueError("Reset is deprecated for version 2 datasets") - def complete(self, items: Iterable[DatasetItem]) -> None: """ Completes the given ``DatasetItem``\\s. diff --git a/tests/darwin/cli_functions_test.py b/tests/darwin/cli_functions_test.py index 182aa5904..6f783b78e 100644 --- a/tests/darwin/cli_functions_test.py +++ b/tests/darwin/cli_functions_test.py @@ -289,25 +289,6 @@ def test_calls_dataset_archive( ) mock.assert_called_once_with(fetch_remote_files_mock.return_value) - def test_calls_dataset_clear( - self, dataset_identifier: str, remote_dataset: RemoteDataset - ): - with patch.object( - Client, "get_remote_dataset", return_value=remote_dataset - ) as get_remote_dataset_mock: - with patch.object( - RemoteDatasetV2, "fetch_remote_files" - ) as fetch_remote_files_mock: - with patch.object(RemoteDatasetV2, "reset") as mock: - set_file_status(dataset_identifier, "clear", ["one.jpg", "two.jpg"]) - get_remote_dataset_mock.assert_called_once_with( - dataset_identifier=dataset_identifier - ) - fetch_remote_files_mock.assert_called_once_with( - {"item_names": "one.jpg,two.jpg"} - ) - mock.assert_called_once_with(fetch_remote_files_mock.return_value) - def test_calls_dataset_new( self, dataset_identifier: str, remote_dataset: RemoteDataset ): From a72411d6bc772453ef94eb3efa7dde453db6967f Mon Sep 17 00:00:00 2001 From: John Wilkie Date: Mon, 17 Feb 2025 14:59:18 +0000 Subject: [PATCH 3/3] Added info about ruff and black to DEV.md --- docs/DEV.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/DEV.md b/docs/DEV.md index 0e25e506f..71d6bce45 100644 --- a/docs/DEV.md +++ b/docs/DEV.md @@ -51,6 +51,40 @@ To run end-to-end tests locally, copy `e2e_tests/.env.example` to `.env` and pop pytest e2e_tests ``` +### Code Formatting and Linting +The project uses two main tools for code quality: + +1. **Black** - The uncompromising code formatter + - Automatically formats Python code to a consistent style + - Run locally before committing: + ``` + black . + ``` + - CI will check formatting with `black --check` + +2. **Ruff** - An extremely fast Python linter + - Enforces code style and catches potential errors + - Run locally: + ``` + ruff check . + ``` + +Both tools are automatically run in CI/CD pipelines for all Python files changed in pull requests. The workflow will: +- Check code formatting with Black +- Run Ruff linting checks +- Fail the build if any issues are found + +To ensure your code passes CI checks, you can run these tools locally before pushing: +```bash +# Format code +black . + +# Run linter +ruff check . +``` + +For VS Code users, it's recommended to enable format-on-save with Black and install the Ruff extension for real-time linting feedback. + ## Useful Aliases Aliases can be helpful for testing and development. Add them to your shell configuration file .bashrc .zshrc etc for ease of use and development ```