## Setup

In [11]:
import shutil
from pathlib import Path

import fiftyone as fo

## Import datasets

In [2]:
# Dataset 1
data_path = "/media/data/datasets/TIL2021_CV_dataset/images/c2_release/images"
labels_path = "/media/data/datasets/TIL2021_CV_dataset/images/c2_release/labels.json"

dataset_1 = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=data_path,
    labels_path=labels_path,
)

 100% |███████████████| 2136/2136 [3.9s elapsed, 0s remaining, 548.5 samples/s]      


In [3]:
# Dataset 2
data_path = "/media/data/datasets/TIL2021_CV_dataset/images/c3_release/images"
labels_path = "/media/data/datasets/TIL2021_CV_dataset/images/c3_release/labels.json"

dataset_2 = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path=data_path,
    labels_path=labels_path,
)

 100% |███████████████| 1068/1068 [1.8s elapsed, 0s remaining, 609.7 samples/s]         


## Check datasets

In [4]:
dataset_1

Name:        2021.07.06.06.15.20
Media type:  image
Num samples: 2136
Persistent:  False
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)

In [13]:
dataset_1.info

{'contributor': '',
 'date_created': '',
 'description': '',
 'url': '',
 'version': '',
 'year': '',
 'licenses': [{'name': '', 'id': 0, 'url': ''}],
 'categories': [{'id': 1, 'name': 'Cat', 'supercategory': ''},
  {'id': 2, 'name': 'Dog', 'supercategory': ''},
  {'id': 3, 'name': 'Bird', 'supercategory': ''},
  {'id': 4, 'name': 'Chicken', 'supercategory': ''},
  {'id': 5, 'name': 'Snake', 'supercategory': ''},
  {'id': 6, 'name': 'Elephant', 'supercategory': ''},
  {'id': 7, 'name': 'Dinosaur', 'supercategory': ''}]}

In [23]:
dataset_1.default_classes

['0', 'Cat', 'Dog', 'Bird', 'Chicken', 'Snake', 'Elephant', 'Dinosaur']

In [5]:
dataset_2

Name:        2021.07.06.06.15.24
Media type:  image
Num samples: 1068
Persistent:  False
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)

In [14]:
dataset_2.info

{'contributor': '',
 'date_created': '',
 'description': '',
 'url': '',
 'version': '',
 'year': '',
 'licenses': [{'name': '', 'id': 0, 'url': ''}],
 'categories': [{'id': 1, 'name': 'Cat', 'supercategory': ''},
  {'id': 2, 'name': 'Dog', 'supercategory': ''},
  {'id': 3, 'name': 'Bird', 'supercategory': ''},
  {'id': 4, 'name': 'Chicken', 'supercategory': ''},
  {'id': 5, 'name': 'Snake', 'supercategory': ''},
  {'id': 6, 'name': 'Elephant', 'supercategory': ''},
  {'id': 7, 'name': 'Dinosaur', 'supercategory': ''},
  {'id': 8, 'name': 'Crocodile', 'supercategory': ''}]}

In [20]:
dataset_2.default_classes

['0',
 'Cat',
 'Dog',
 'Bird',
 'Chicken',
 'Snake',
 'Elephant',
 'Dinosaur',
 'Crocodile']

## Merge datasets

In [24]:
dataset_1.merge_samples(dataset_2, overwrite_info=True)

### Sanity check

In [25]:
dataset_1

Name:        2021.07.06.06.15.20
Media type:  image
Num samples: 3204
Persistent:  False
Tags:        []
Sample fields:
    id:           fiftyone.core.fields.ObjectIdField
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)

In [26]:
dataset_1.info

{'contributor': '',
 'date_created': '',
 'description': '',
 'url': '',
 'version': '',
 'year': '',
 'licenses': [{'name': '', 'id': 0, 'url': ''}],
 'categories': [{'id': 1, 'name': 'Cat', 'supercategory': ''},
  {'id': 2, 'name': 'Dog', 'supercategory': ''},
  {'id': 3, 'name': 'Bird', 'supercategory': ''},
  {'id': 4, 'name': 'Chicken', 'supercategory': ''},
  {'id': 5, 'name': 'Snake', 'supercategory': ''},
  {'id': 6, 'name': 'Elephant', 'supercategory': ''},
  {'id': 7, 'name': 'Dinosaur', 'supercategory': ''},
  {'id': 8, 'name': 'Crocodile', 'supercategory': ''}]}

In [27]:
dataset_1.default_classes

['0',
 'Cat',
 'Dog',
 'Bird',
 'Chicken',
 'Snake',
 'Elephant',
 'Dinosaur',
 'Crocodile']

## Export merged dataset

In [28]:
export_dir = Path("c3_combined")

if export_dir.is_dir():
    if input(f'{export_dir.name} will be overwritten. Are you sure? y/n: ') == 'y':
        shutil.rmtree(export_dir)
        dataset_1.export(
            dataset_type=fo.types.COCODetectionDataset,
            export_dir=str(export_dir),
            label_field="ground_truth"
        )

c3_combined will be overwritten. Are you sure? y/n: y
 100% |███████████████| 3204/3204 [4.7s elapsed, 0s remaining, 737.7 samples/s]      
