# Voxel51 <-> V7 Integration External Storage Walkthrough

# Single-Slotted Example

In [2]:
# Imports
from uuid import uuid4
import fiftyone as fo
import fiftyone.zoo as foz
import darwin_fiftyone

## Load and prepare the data

There are two extra things to consider when loading files from external storage: `External file paths` and the `external_storage` argument

### External file path
**The filepath of each Sample needs to point to the full external path of the file**. For example:

- AWS S3: `s3://v7-john/directory/structure/file.jpg`
- Azure Blob: `https://v7-john.blob.core.windows.net/v7-john-container/directory/structure/file.jpg`
- GCP Bucket: `gs://v7-john/directory/structure/file.jpg`

This is because we construct [storage keys](https://docs.v7labs.com/docs/registering-items-from-external-storage#the-basics) by splitting each `Sample` file path by instances of `/` , then joining the all items in the resulting list from the 4th item onward.

So long as each Sample file path is the full external path (no matter the Cloud environment), the storage key will be constructed correctly. The code that does this is [here](https://github.com/v7labs/darwin_fiftyone/blob/c26d001a49b0690db1d977992db889259ea6cda9/darwin_fiftyone/darwin.py#L1580).

The reason for this workaround is that the open-source version of V51 that V7 has access to doesn’t natively support external storage. 

In [None]:
# Update depending on your external storage environment
storage_keys = [
    "gs://john-gcp-bucket/voxel51-tests/000002.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000008.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000020.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000031.jpg",
]

samples = [fo.Sample(filepath=storage_key) for storage_key in storage_keys]
dataset = fo.Dataset()
dataset.add_samples(samples)

for sample in samples:
    sample.filepath = sample.filepath.replace("/Users/john/Documents/code/development/darwin_fiftyone/gs:", "gs:/")  # Update based on your local & external storage environments

# Check the filepaths of the samples to ensure they are updated correctly
for sample in dataset:
    print(sample.filepath)


In [None]:

# Define a label schema
label_schema = {
    "new_detections": {
        "type": "detections",
        "classes": ["apple","orange"],
    }
}

### The external_storage argument

When calling the `annotate()` function, the `external_storage` argument needs to be passed as the configured storage slug `Name` in Darwin

In [None]:
# Create a unique identifier for this annotation run
anno_key = f"key_{str(uuid4()).replace('-', '_')}"

# Send the data from Voxel51 to Darwin. 
dataset.annotate(
    anno_key,
    label_schema = label_schema,
    launch_editor=True,
    backend="darwin",
    dataset_slug="v51-external-storage-demo",
    external_storage="v7-john-bucket",
    base_url="https://darwin.irl.v7labs.com/api/v2/teams", 
)

Annotation in Darwin takes place at this stage

In [None]:
# After annotating, send the annotations back:
dataset.load_annotations(anno_key)

# Finally, visualise the results back in Voxel51
session = fo.launch_app(dataset)

# Multi-Slotted Example

In [None]:
# Imports
from uuid import uuid4
import fiftyone as fo
import fiftyone.zoo as foz
import darwin_fiftyone

## Load and prepare the data

In [None]:
# Update depending on your external storage environment
storage_keys_left = [
    "gs://john-gcp-bucket/voxel51-tests/000002.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000008.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000020.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000031.jpg",
]
storage_keys_right = [
    "gs://john-gcp-bucket/voxel51-tests/000035.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000058.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000083.jpg",
    "gs://john-gcp-bucket/voxel51-tests/000089.jpg",
]

dataset = fo.Dataset()
dataset.add_group_field("group")


### Manually create V7 multi-slot items by creating V51 [Grouped Dataset](https://docs.voxel51.com/user_guide/groups.html#adding-samples)

In [None]:

# Create a grouped dataset with two slices (=slots): left and right
samples = []
for file_left, file_right in zip(storage_keys_left, storage_keys_right):
    group = fo.Group() # create the sample group
    sample_left = fo.Sample(filepath=file_left, group=group.element("left")) # create the left-sample and assign to the group
    sample_right = fo.Sample(filepath=file_right, group=group.element("right"))# create the right-sample and assign to the group
    samples.append(sample_left)
    samples.append(sample_right)

dataset.add_samples(samples)

for sample in samples:
    sample.filepath = sample.filepath.replace("/Users/john/Documents/code/development/darwin_fiftyone/gs:", "gs:/")  # Update based on your local & external storage environments

label_schema = {
    "new_detections": {
        "type": "detections",
        "classes": ["apple","orange"],
    }
}

## Register the data in Darwin

In [None]:
# Create a unique identifier for this annotation run
anno_key = f"key_{str(uuid4()).replace('-', '_')}"

# Send the data from Voxel51 to Darwin. 
groups = dataset.select_group_slices(media_type="image")
groups.annotate(
    anno_key,
    label_schema = label_schema,
    launch_editor=True,
    backend="darwin",
    dataset_slug="v51-external-storage-demo",
    Groups=True,
    external_storage="v7-john-bucket",
    base_url="https://darwin.irl.v7labs.com/api/v2/teams",
)

Annotation in Darwin takes place at this stage

In [None]:
# After annotating, send the annotations back:
dataset.load_annotations(anno_key)

# Finally, visualise the results back in Voxel51
session = fo.launch_app(dataset)