## Setup

In [2]:
import textwrap

import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

## Print some information about the zoo dataset

This example uses the Open Images V6 dataset, but you can change this to any of the [supported zoo datasets](https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html)

In [3]:
# foz.delete_zoo_dataset("open-images-v6")

In [4]:
zoo_dataset = foz.get_zoo_dataset("open-images-v6")

print("***** Dataset description *****")
print(textwrap.dedent("    " + zoo_dataset.__doc__))

print("***** Tags *****")
print("%s\n" % ", ".join(zoo_dataset.tags))

print("***** Supported splits *****")
print("%s\n" % ", ".join(zoo_dataset.supported_splits))

***** Dataset description *****
Open Images V6 is a dataset of ~9 million images, roughly 2 million of
which are annotated and available via this zoo dataset.

The dataset contains annotations for classification, detection,
segmentation, and visual relationship tasks for the 600 boxable object
classes.

This dataset supports partial downloads:

-   You can specify subsets of data to download via the``label_types``,
    ``classes``, ``attrs``, and ``max_samples`` parameters
-   You can specify specific images to load via the ``image_ids`` parameter

See :ref:`this page <dataset-zoo-open-images-v6>` for more information
about partial downloads of this dataset.

Full split stats:

-   Train split: 1,743,042 images (513 GB)
-   Test split: 125,436 images (36 GB)
-   Validation split: 41,620 images (12 GB)

Notes:

-   Not all images contain all types of labels
-   All images have been rescaled so that their largest dimension is at
    most 1024 pixels

Example usage::

    #
    # Load 50 

## Load (and download) images from the dataset

Specify your [required parameters](https://voxel51.com/docs/fiftyone/api/fiftyone.zoo.datasets.html#fiftyone.zoo.datasets.load_zoo_dataset). By default, the dataset will be downloaded if it does not already exist in the specified directory.

In [5]:
classes = ['Cat', 'Dog', 'Bird', 'Chicken', 'Snake', 'Elephant', 'Dinosaur', 'Crocodile']

datasets = {}
for i, classname in enumerate(classes):
    datasets[i] = foz.load_zoo_dataset(
        "open-images-v6", 
        label_types=["detections"], 
        classes=[classname],
        split="validation",
        max_samples=100,
        shuffle=True,
        dataset_name=f"{classname}"
    )

Downloading split 'validation' to '/root/fiftyone/open-images-v6/validation'
Downloading 'https://storage.googleapis.com/openimages/2018_04/validation/validation-images-with-rotation.csv' to '/root/fiftyone/open-images-v6/validation/metadata/image_ids.csv'
Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v6/validation/metadata/classes.csv'
Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpdy26w5l0/metadata/hierarchy.json'
Downloading 'https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv' to '/root/fiftyone/open-images-v6/validation/labels/detections.csv'
Downloading 100 images
 100% |█████████████████| 100/100 [7.9s elapsed, 0s remaining, 18.6 samples/s]      
Dataset info written to '/root/fiftyone/open-images-v6/info.json'
Loading 'open-images-v6' split 'validation'
 100% |█████████████████| 100/100 [360.4ms elapsed, 0s remaining, 27

## Print some information about the Open Images V6 downloaded dataset

In [6]:
dataset_dir = foz.find_zoo_dataset("open-images-v6")
info = foz.load_zoo_dataset_info("open-images-v6")

print("***** Dataset location *****")
print(dataset_dir)

print("\n***** Dataset info *****")
print(info)

***** Dataset location *****
/root/fiftyone/open-images-v6

***** Dataset info *****
{
    "name": "open-images-v6",
    "zoo_dataset": "fiftyone.zoo.datasets.base.OpenImagesV6Dataset",
    "dataset_type": "fiftyone.types.dataset_types.OpenImagesV6Dataset",
    "num_samples": 510,
    "downloaded_splits": {
        "validation": {
            "split": "validation",
            "num_samples": 510
        }
    },
    "classes": [
        "Accordion",
        "Adhesive tape",
        "Aircraft",
        "Airplane",
        "Alarm clock",
        "Alpaca",
        "Ambulance",
        "Animal",
        "Ant",
        "Antelope",
        "Apple",
        "Armadillo",
        "Artichoke",
        "Auto part",
        "Axe",
        "Backpack",
        "Bagel",
        "Baked goods",
        "Balance beam",
        "Ball",
        "Balloon",
        "Banana",
        "Band-aid",
        "Banjo",
        "Barge",
        "Barrel",
        "Baseball bat",
        "Baseball glove",
        "Bat

## View summary info about the dataset

In [7]:
datasets[0]

Name:        Cat
Media type:  image
Num samples: 100
Persistent:  False
Tags:        ['validation']
Sample fields:
    id:             fiftyone.core.fields.ObjectIdField
    filepath:       fiftyone.core.fields.StringField
    tags:           fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    detections:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    open_images_id: fiftyone.core.fields.StringField

## Print the first few samples in the dataset

In [8]:
datasets[0].head()

[<Sample: {
     'id': '60e80ea6896a3d57dbbcf8e2',
     'media_type': 'image',
     'filepath': '/root/fiftyone/open-images-v6/validation/data/6114d8024056b77c.jpg',
     'tags': BaseList(['validation']),
     'metadata': None,
     'detections': <Detections: {
         'detections': BaseList([
             <Detection: {
                 'id': '60e80ea6896a3d57dbbcf8e1',
                 'attributes': BaseDict({}),
                 'tags': BaseList([]),
                 'label': 'Cat',
                 'bounding_box': BaseList([0.04845815, 0.02660754, 0.95154185, 0.93791572]),
                 'mask': None,
                 'confidence': None,
                 'index': None,
                 'IsOccluded': False,
                 'IsTruncated': False,
                 'IsGroupOf': False,
                 'IsDepiction': False,
                 'IsInside': False,
             }>,
         ]),
     }>,
     'open_images_id': '6114d8024056b77c',
 }>,
 <Sample: {
     'id': '60e80ea6896a3d57

## Merge downloaded dataset splits

In [9]:
for i, v in datasets.items():
    if i > 0:
        datasets[0].merge_samples(datasets[i], overwrite_info=True)

## Check merged dataset

In [10]:
datasets[0]

Name:        Cat
Media type:  image
Num samples: 509
Persistent:  False
Tags:        ['validation']
Sample fields:
    id:             fiftyone.core.fields.ObjectIdField
    filepath:       fiftyone.core.fields.StringField
    tags:           fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    detections:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    open_images_id: fiftyone.core.fields.StringField

In [11]:
datasets[0].head()

[<Sample: {
     'id': '60e80ea6896a3d57dbbcf8e2',
     'media_type': 'image',
     'filepath': '/root/fiftyone/open-images-v6/validation/data/6114d8024056b77c.jpg',
     'tags': BaseList(['validation']),
     'metadata': None,
     'detections': <Detections: {
         'detections': BaseList([
             <Detection: {
                 'id': '60e80ea6896a3d57dbbcf8e1',
                 'attributes': BaseDict({}),
                 'tags': BaseList([]),
                 'label': 'Cat',
                 'bounding_box': BaseList([0.04845815, 0.02660754, 0.95154185, 0.93791572]),
                 'mask': None,
                 'confidence': None,
                 'index': None,
                 'IsOccluded': False,
                 'IsTruncated': False,
                 'IsGroupOf': False,
                 'IsDepiction': False,
                 'IsInside': False,
             }>,
         ]),
     }>,
     'open_images_id': '6114d8024056b77c',
 }>,
 <Sample: {
     'id': '60e80ea6896a3d57

## Post-processing

In this example, to ensure the exported dataset (in COCO format) retains the 'iscrowd' attribute, we have to change the 'IsGroupOf' attribute from the Open Images V6 dataset to 'iscrowd'.

In [12]:
view = datasets[0].set_field("detections.detections.iscrowd", F("IsGroupOf").to_int())
view.save()

In [6]:
datasets[0].head()

[<Sample: {
     'id': '60e80ea6896a3d57dbbcf8e2',
     'media_type': 'image',
     'filepath': '/root/fiftyone/open-images-v6/validation/data/6114d8024056b77c.jpg',
     'tags': BaseList(['validation']),
     'metadata': None,
     'detections': <Detections: {
         'detections': BaseList([
             <Detection: {
                 'id': '60e80ea6896a3d57dbbcf8e1',
                 'attributes': BaseDict({}),
                 'tags': BaseList([]),
                 'label': 'Cat',
                 'bounding_box': BaseList([0.04845815, 0.02660754, 0.95154185, 0.93791572]),
                 'mask': None,
                 'confidence': None,
                 'index': None,
                 'IsOccluded': False,
                 'IsTruncated': False,
                 'IsGroupOf': False,
                 'IsDepiction': False,
                 'IsInside': False,
                 'iscrowd': 0,
             }>,
         ]),
     }>,
     'open_images_id': '6114d8024056b77c',
 }>,
 <Sample:

## Export merged dataset

In [13]:
datasets[0].export(
   export_dir="/media/data/fiftyone/openimages_dataset/",
   dataset_type = fo.types.COCODetectionDataset,
   label_field= "detections",
)

 100% |█████████████████| 509/509 [7.7s elapsed, 0s remaining, 58.6 samples/s]       
