Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion darwin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run(args, parser):
elif args.action == "report":
f.dataset_report(args.dataset, args.granularity or "day")
elif args.action == "export":
f.export_dataset(args.dataset, args.annotation_class, args.name)
f.export_dataset(args.dataset, args.include_url_token, args.annotation_class, args.name)
elif args.action == "releases":
f.dataset_list_releases(args.dataset)
elif args.action == "pull":
Expand Down
6 changes: 4 additions & 2 deletions darwin/cli_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ def dataset_report(dataset_slug: str, granularity) -> Path:
_error(f"Dataset '{dataset_slug}' does not exist.")


def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = None, name: Optional[str] = None):
def export_dataset(
dataset_slug: str, include_url_token: bool, annotation_class_ids: Optional[List] = None, name: Optional[str] = None
):
"""Create a new release for the dataset

Parameters
Expand All @@ -214,7 +216,7 @@ def export_dataset(dataset_slug: str, annotation_class_ids: Optional[List] = Non
client = _load_client(offline=False)
identifier = DatasetIdentifier.parse(dataset_slug)
ds = client.get_remote_dataset(identifier)
ds.export(annotation_class_ids=annotation_class_ids, name=name)
ds.export(annotation_class_ids=annotation_class_ids, name=name, include_url_token=include_url_token)
identifier.version = name
print(f"Dataset {dataset_slug} successfully exported to {identifier}")

Expand Down
35 changes: 23 additions & 12 deletions darwin/dataset/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import json
import time
from pathlib import Path
from typing import Optional

import requests

from darwin.utils import is_image_extension_allowed


def download_all_images_from_annotations(
api_key: str,
api_url: str,
annotations_path: Path,
images_path: Path,
Expand All @@ -21,6 +21,8 @@ def download_all_images_from_annotations(

Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotations_path : Path
Expand Down Expand Up @@ -73,17 +75,23 @@ def download_all_images_from_annotations(
# Create the generator with the partial functions
count = len(annotations_to_download_path)
generator = lambda: (
functools.partial(download_image_from_annotation, api_url, annotation_path, images_path, annotation_format)
functools.partial(
download_image_from_annotation, api_key, api_url, annotation_path, images_path, annotation_format
)
for annotation_path in annotations_to_download_path
)
return generator, count


def download_image_from_annotation(api_url: str, annotation_path: Path, images_path: str, annotation_format: str):
def download_image_from_annotation(
api_key: str, api_url: str, annotation_path: Path, images_path: str, annotation_format: str
):
"""Helper function: dispatcher of functions to download an image given an annotation

Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Expand All @@ -94,20 +102,22 @@ def download_image_from_annotation(api_url: str, annotation_path: Path, images_p
Format of the annotations. Currently only JSON is supported
"""
if annotation_format == "json":
download_image_from_json_annotation(api_url, annotation_path, images_path)
download_image_from_json_annotation(api_key, api_url, annotation_path, images_path)
elif annotation_format == "xml":
print("sorry can't let you do that dave")
raise NotImplementedError
# download_image_from_xml_annotation(annotation_path, images_path)


def download_image_from_json_annotation(api_url: str, annotation_path: Path, image_path: str):
def download_image_from_json_annotation(api_key: str, api_url: str, annotation_path: Path, image_path: str):
"""
Helper function: downloads an image given a .json annotation path
and renames the json after the image filename

Parameters
----------
api_key : str
API Key of the current team
api_url : str
Url of the darwin API (e.g. 'https://darwin.v7labs.com/api/')
annotation_path : Path
Expand All @@ -122,10 +132,10 @@ def download_image_from_json_annotation(api_url: str, annotation_path: Path, ima
original_filename_suffix = Path(annotation["image"]["original_filename"]).suffix
path = Path(image_path) / (annotation_path.stem + original_filename_suffix)

download_image(annotation["image"]["url"], path)
download_image(annotation["image"]["url"], path, api_key)


def download_image(url: str, path: Path, verbose: Optional[bool] = False):
def download_image(url: str, path: Path, api_key: str):
"""Helper function: downloads one image from url.

Parameters
Expand All @@ -134,17 +144,18 @@ def download_image(url: str, path: Path, verbose: Optional[bool] = False):
Url of the image to download
path : Path
Path where to download the image, with filename
verbose : bool
Flag for the logging level
api_key : str
API Key of the current team
"""
if path.exists():
return
if verbose:
print(f"Dowloading {path.name}")
TIMEOUT = 60
start = time.time()
while True:
response = requests.get(url, stream=True)
if "token" in url:
response = requests.get(url, stream=True)
else:
response = requests.get(url, headers={"Authorization": f"ApiKey {api_key}"}, stream=True)
# Correct status: download image
if response.status_code == 200:
with open(str(path), "wb") as file:
Expand Down
14 changes: 12 additions & 2 deletions darwin/dataset/remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,12 @@ def pull(
# No images will be downloaded
return None, 0

team_config = self.client.config.get_team(self.team)
api_key = team_config.get("api_key")

# Create the generator with the download instructions
progress, count = download_all_images_from_annotations(
api_key=api_key,
api_url=self.client.url,
annotations_path=annotations_dir,
images_path=self.local_images_path,
Expand Down Expand Up @@ -295,7 +299,7 @@ def fetch_remote_classes(self):
"annotation_classes"
]

def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
def export(self, name: str, annotation_class_ids: Optional[List[str]] = None, include_url_token: bool = False):
"""Create a new release for the dataset

Parameters
Expand All @@ -304,10 +308,16 @@ def export(self, name: str, annotation_class_ids: Optional[List[str]] = None):
Name of the release
annotation_class_ids: List
List of the classes to filter
include_url_token: bool
Should the image url in the export be include a token enabling access without team membership
"""
if annotation_class_ids is None:
annotation_class_ids = []
payload = {"annotation_class_ids": annotation_class_ids, "name": name}
payload = {
"annotation_class_ids": annotation_class_ids,
"name": name,
"include_export_token": include_url_token,
}
self.client.post(
f"/datasets/{self.dataset_id}/exports",
payload=payload,
Expand Down
7 changes: 7 additions & 0 deletions darwin/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ def __init__(self):
parser_export.add_argument("dataset", type=str, help="Remote dataset name to export.")
parser_export.add_argument("name", type=str, help="Name with with the version gets tagged.")
parser_export.add_argument("annotation_class", type=str, nargs="?", help="List of class filters")
parser_export.add_argument(
"--include-url-token",
default=False,
action="store_true",
help="Each annotation file includes a url with an access token."
"Warning, anyone with the url can access the images, even without being a team member",
)

# Releases
parser_dataset_version = dataset_action.add_parser("releases", help="Available version of a dataset.")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="darwin-py",
version="0.5.1",
version="0.5.2",
author="V7",
author_email="info@v7labs.com",
description="Library and command line interface for darwin.v7labs.com",
Expand Down