Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None:
files: List[str]
Names of the files we want to update.
"""
available_statuses = ["archived", "clear", "new", "restore-archived", "complete"]
available_statuses = ["archived", "new", "restore-archived", "complete"]
if status not in available_statuses:
_error(
f"Invalid status '{status}', available statuses: {', '.join(available_statuses)}"
Expand All @@ -1075,8 +1075,6 @@ def set_file_status(dataset_slug: str, status: str, files: List[str]) -> None:
)
if status == "archived":
dataset.archive(items)
elif status == "clear":
dataset.reset(items)
elif status == "new":
dataset.move_to_new(items)
elif status == "restore-archived":
Expand Down
31 changes: 10 additions & 21 deletions darwin/dataset/remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,68 +467,57 @@ def fetch_remote_files(
"""

@abstractmethod
def archive(self, items: Iterator[DatasetItem]) -> None:
def archive(self, items: Iterable[DatasetItem]) -> None:
"""
Archives (soft-deletion) the given ``DatasetItem``\\s belonging to this ``RemoteDataset``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be archived.
"""

@abstractmethod
def restore_archived(self, items: Iterator[DatasetItem]) -> None:
def restore_archived(self, items: Iterable[DatasetItem]) -> None:
"""
Restores the archived ``DatasetItem``\\s that belong to this ``RemoteDataset``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be restored.
"""

@abstractmethod
def move_to_new(self, items: Iterator[DatasetItem]) -> None:
def move_to_new(self, items: Iterable[DatasetItem]) -> None:
"""
Changes the given ``DatasetItem``\\s status to ``new``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s whose status will change.
"""

@abstractmethod
def reset(self, items: Iterator[DatasetItem]) -> None:
"""
Resets the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
The ``DatasetItem``\\s to be reset.
"""

@abstractmethod
def complete(self, items: Iterator[DatasetItem]) -> None:
def complete(self, items: Iterable[DatasetItem]) -> None:
"""
Completes the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be completed.
"""

@abstractmethod
def delete_items(self, items: Iterator[DatasetItem]) -> None:
def delete_items(self, items: Iterable[DatasetItem]) -> None:
"""
Deletes the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be deleted.
"""

Expand Down
33 changes: 11 additions & 22 deletions darwin/dataset/remote_dataset_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Sequence,
Tuple,
Union,
Iterable,
)
import numpy as np
from pydantic import ValidationError
Expand Down Expand Up @@ -362,13 +363,13 @@ def fetch_remote_files(
else:
return

def archive(self, items: Iterator[DatasetItem]) -> None:
def archive(self, items: Iterable[DatasetItem]) -> None:
"""
Archives (soft-deletion) the given ``DatasetItem``\\s belonging to this ``RemoteDataset``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be archived.
"""
payload: Dict[str, Any] = {
Expand All @@ -379,13 +380,13 @@ def archive(self, items: Iterator[DatasetItem]) -> None:
}
self.client.api_v2.archive_items(payload, team_slug=self.team)

def restore_archived(self, items: Iterator[DatasetItem]) -> None:
def restore_archived(self, items: Iterable[DatasetItem]) -> None:
"""
Restores the archived ``DatasetItem``\\s that belong to this ``RemoteDataset``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be restored.
"""
payload: Dict[str, Any] = {
Expand All @@ -396,13 +397,13 @@ def restore_archived(self, items: Iterator[DatasetItem]) -> None:
}
self.client.api_v2.restore_archived_items(payload, team_slug=self.team)

def move_to_new(self, items: Iterator[DatasetItem]) -> None:
def move_to_new(self, items: Iterable[DatasetItem]) -> None:
"""
Changes the given ``DatasetItem``\\s status to ``new``.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s whose status will change.
"""

Expand All @@ -417,25 +418,13 @@ def move_to_new(self, items: Iterator[DatasetItem]) -> None:
team_slug=self.team,
)

def reset(self, items: Iterator[DatasetItem]) -> None:
"""
Deprecated
Resets the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
The ``DatasetItem``\\s to be resetted.
"""
raise ValueError("Reset is deprecated for version 2 datasets")

def complete(self, items: Iterator[DatasetItem]) -> None:
def complete(self, items: Iterable[DatasetItem]) -> None:
"""
Completes the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be completed.
"""
(workflow_id, stages) = self._fetch_stages("complete")
Expand All @@ -449,13 +438,13 @@ def complete(self, items: Iterator[DatasetItem]) -> None:
team_slug=self.team,
)

def delete_items(self, items: Iterator[DatasetItem]) -> None:
def delete_items(self, items: Iterable[DatasetItem]) -> None:
"""
Deletes the given ``DatasetItem``\\s.

Parameters
----------
items : Iterator[DatasetItem]
items : Iterable[DatasetItem]
The ``DatasetItem``\\s to be deleted.
"""
self.client.api_v2.delete_items(
Expand Down
34 changes: 34 additions & 0 deletions docs/DEV.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,40 @@ To run end-to-end tests locally, copy `e2e_tests/.env.example` to `.env` and pop
pytest e2e_tests
```

### Code Formatting and Linting
The project uses two main tools for code quality:

1. **Black** - The uncompromising code formatter
- Automatically formats Python code to a consistent style
- Run locally before committing:
```
black .
```
- CI will check formatting with `black --check`

2. **Ruff** - An extremely fast Python linter
- Enforces code style and catches potential errors
- Run locally:
```
ruff check .
```

Both tools are automatically run in CI/CD pipelines for all Python files changed in pull requests. The workflow will:
- Check code formatting with Black
- Run Ruff linting checks
- Fail the build if any issues are found

To ensure your code passes CI checks, you can run these tools locally before pushing:
```bash
# Format code
black .

# Run linter
ruff check .
```

For VS Code users, it's recommended to enable format-on-save with Black and install the Ruff extension for real-time linting feedback.

## Useful Aliases
Aliases can be helpful for testing and development. Add them to your shell configuration file .bashrc .zshrc etc for ease of use and development
```
Expand Down
19 changes: 0 additions & 19 deletions tests/darwin/cli_functions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,25 +289,6 @@ def test_calls_dataset_archive(
)
mock.assert_called_once_with(fetch_remote_files_mock.return_value)

def test_calls_dataset_clear(
self, dataset_identifier: str, remote_dataset: RemoteDataset
):
with patch.object(
Client, "get_remote_dataset", return_value=remote_dataset
) as get_remote_dataset_mock:
with patch.object(
RemoteDatasetV2, "fetch_remote_files"
) as fetch_remote_files_mock:
with patch.object(RemoteDatasetV2, "reset") as mock:
set_file_status(dataset_identifier, "clear", ["one.jpg", "two.jpg"])
get_remote_dataset_mock.assert_called_once_with(
dataset_identifier=dataset_identifier
)
fetch_remote_files_mock.assert_called_once_with(
{"item_names": "one.jpg,two.jpg"}
)
mock.assert_called_once_with(fetch_remote_files_mock.return_value)

def test_calls_dataset_new(
self, dataset_identifier: str, remote_dataset: RemoteDataset
):
Expand Down
Loading