diff --git a/darwin/cli.py b/darwin/cli.py index b0e9b0a0c..c29549bf1 100644 --- a/darwin/cli.py +++ b/darwin/cli.py @@ -69,7 +69,7 @@ def run(args, parser): elif args.action == "report": f.dataset_report(args.dataset, args.granularity or "day") elif args.action == "export": - f.export_dataset(args.dataset, args.annotation_class, args.name) + f.export_dataset(args.dataset, args.include_url_token, args.annotation_class, args.name) elif args.action == "releases": f.dataset_list_releases(args.dataset) elif args.action == "pull": diff --git a/darwin/cli_functions.py b/darwin/cli_functions.py index 5f3486026..e165afc19 100644 --- a/darwin/cli_functions.py +++ b/darwin/cli_functions.py @@ -199,7 +199,9 @@ def dataset_report(dataset_slug: str, granularity) -> Path: _error(f"Dataset '{dataset_slug}' does not exist.") -def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = None, name: Optional[str] = None): +def export_dataset( + dataset_slug: str, include_url_token: bool, annotation_class_ids: Optional[List] = None, name: Optional[str] = None +): """Create a new release for the dataset Parameters @@ -214,7 +216,7 @@ def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = Non client = _load_client(offline=False) identifier = DatasetIdentifier.parse(dataset_slug) ds = client.get_remote_dataset(identifier) - ds.export(annotation_class_ids=annotation_class_ids, name=name) + ds.export(annotation_class_ids=annotation_class_ids, name=name, include_url_token=include_url_token) identifier.version = name print(f"Dataset {dataset_slug} successfully exported to {identifier}") diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py index c3af31a7e..7d14f1434 100644 --- a/darwin/dataset/download_manager.py +++ b/darwin/dataset/download_manager.py @@ -2,7 +2,6 @@ import json import time from pathlib import Path -from typing import Optional import requests @@ -10,6 +9,7 @@ def download_all_images_from_annotations( + api_key: str, api_url: str, annotations_path: Path, images_path: Path, @@ -21,6 +21,8 @@ def download_all_images_from_annotations( Parameters ---------- + api_key : str + API Key of the current team api_url : str Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') annotations_path : Path @@ -73,17 +75,23 @@ def download_all_images_from_annotations( # Create the generator with the partial functions count = len(annotations_to_download_path) generator = lambda: ( - functools.partial(download_image_from_annotation, api_url, annotation_path, images_path, annotation_format) + functools.partial( + download_image_from_annotation, api_key, api_url, annotation_path, images_path, annotation_format + ) for annotation_path in annotations_to_download_path ) return generator, count -def download_image_from_annotation(api_url: str, annotation_path: Path, images_path: str, annotation_format: str): +def download_image_from_annotation( + api_key: str, api_url: str, annotation_path: Path, images_path: str, annotation_format: str +): """Helper function: dispatcher of functions to download an image given an annotation Parameters ---------- + api_key : str + API Key of the current team api_url : str Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') annotation_path : Path @@ -94,20 +102,22 @@ def download_image_from_annotation(api_url: str, annotation_path: Path, images_p Format of the annotations. Currently only JSON is supported """ if annotation_format == "json": - download_image_from_json_annotation(api_url, annotation_path, images_path) + download_image_from_json_annotation(api_key, api_url, annotation_path, images_path) elif annotation_format == "xml": print("sorry can't let you do that dave") raise NotImplementedError # download_image_from_xml_annotation(annotation_path, images_path) -def download_image_from_json_annotation(api_url: str, annotation_path: Path, image_path: str): +def download_image_from_json_annotation(api_key: str, api_url: str, annotation_path: Path, image_path: str): """ Helper function: downloads an image given a .json annotation path and renames the json after the image filename Parameters ---------- + api_key : str + API Key of the current team api_url : str Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/') annotation_path : Path @@ -122,10 +132,10 @@ def download_image_from_json_annotation(api_url: str, annotation_path: Path, ima original_filename_suffix = Path(annotation["image"]["original_filename"]).suffix path = Path(image_path) / (annotation_path.stem + original_filename_suffix) - download_image(annotation["image"]["url"], path) + download_image(annotation["image"]["url"], path, api_key) -def download_image(url: str, path: Path, verbose: Optional[bool] = False): +def download_image(url: str, path: Path, api_key: str): """Helper function: downloads one image from url. Parameters @@ -134,17 +144,18 @@ def download_image(url: str, path: Path, verbose: Optional[bool] = False): Url of the image to download path : Path Path where to download the image, with filename - verbose : bool - Flag for the logging level + api_key : str + API Key of the current team """ if path.exists(): return - if verbose: - print(f"Dowloading {path.name}") TIMEOUT = 60 start = time.time() while True: - response = requests.get(url, stream=True) + if "token" in url: + response = requests.get(url, stream=True) + else: + response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True) # Correct status: download image if response.status_code == 200: with open(str(path), "wb") as file: diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py index e6c42dcc3..99ceac7e9 100644 --- a/darwin/dataset/remote_dataset.py +++ b/darwin/dataset/remote_dataset.py @@ -233,8 +233,12 @@ def pull( # No images will be downloaded return None, 0 + team_config = self.client.config.get_team(self.team) + api_key = team_config.get("api_key") + # Create the generator with the download instructions progress, count = download_all_images_from_annotations( + api_key=api_key, api_url=self.client.url, annotations_path=annotations_dir, images_path=self.local_images_path, @@ -295,7 +299,7 @@ def fetch_remote_classes(self): "annotation_classes" ] - def export(self, name: str, annotation_class_ids: Optional[List[str]] = None): + def export(self, name: str, annotation_class_ids: Optional[List[str]] = None, include_url_token: bool = False): """Create a new release for the dataset Parameters @@ -304,10 +308,16 @@ def export(self, name: str, annotation_class_ids: Optional[List[str]] = None): Name of the release annotation_class_ids: List List of the classes to filter + include_url_token: bool + Should the image url in the export be include a token enabling access without team membership """ if annotation_class_ids is None: annotation_class_ids = [] - payload = {"annotation_class_ids": annotation_class_ids, "name": name} + payload = { + "annotation_class_ids": annotation_class_ids, + "name": name, + "include_export_token": include_url_token, + } self.client.post( f"/datasets/{self.dataset_id}/exports", payload=payload, diff --git a/darwin/options.py b/darwin/options.py index 63abc635c..a5152b43a 100644 --- a/darwin/options.py +++ b/darwin/options.py @@ -95,6 +95,13 @@ def __init__(self): parser_export.add_argument("dataset", type=str, help="Remote dataset name to export.") parser_export.add_argument("name", type=str, help="Name with with the version gets tagged.") parser_export.add_argument("annotation_class", type=str, nargs="?", help="List of class filters") + parser_export.add_argument( + "--include-url-token", + default=False, + action="store_true", + help="Each annotation file includes a url with an access token." + "Warning, anyone with the url can access the images, even without being a team member", + ) # Releases parser_dataset_version = dataset_action.add_parser("releases", help="Available version of a dataset.") diff --git a/setup.py b/setup.py index 288b842ca..52ff3bc24 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="darwin-py", - version="0.5.1", + version="0.5.2", author="V7", author_email="info@v7labs.com", description="Library and command line interface for darwin.v7labs.com",