# Getting started 
## Importing a labeled dataset
---

In [1]:
!pip3 install labelbox[data]
import labelbox
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification,Option
from labelbox.schema.annotation_import import MALPredictionImport
from labelbox.data.serialization import NDJsonConverter
from labelbox.schema.annotation_import import LabelImport
from labelbox.schema.queue_mode import QueueMode
from labelbox.schema.media_type import MediaType
from labelbox import LabelingFrontend
from labelbox.data.annotation_types import (
    Label,
    Point,
    ImageData,
    Rectangle,
    ObjectAnnotation,
)
from labelbox.schema.data_row_metadata import (
    DataRowMetadata,
    DataRowMetadataField,
    DeleteDataRowMetadata,
    DataRowMetadataKind
)

import requests
import json
import os
import time
from tqdm.notebook import tqdm
import datetime
import random

zsh:1: no matches found: labelbox[data]


In [2]:
## Generic data download function
def download_files(filemap):
    path, uri = filemap    
    ## Download data
    if not os.path.exists(path):
        r = requests.get(uri, stream=True)
        if r.status_code == 200:
            with open(path, 'wb') as f:
                for chunk in r:
                    f.write(chunk)
    return path

# Setup Labelbox client

In [3]:
## Generate API key: https://app.labelbox.com/account/api-keys
import os
client = labelbox.Client(api_key=os.environ['LABELBOX_TEST_API_KEY_LOCAL'], endpoint="http://localhost:8080/graphql")

DATA_ROWS = "https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_datarows.json"
ANNOTATIONS = "https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_annotations.json"

# Download a public dataset

In [4]:
download_files(("data_rows.json", DATA_ROWS))
download_files(("annotations.json", ANNOTATIONS))

'annotations.json'

In [5]:
with open('data_rows.json', 'r') as fp:
    data_rows = json.load(fp)

with open('annotations.json', 'r') as fp:
    annotations = json.load(fp)

# Create a dataset

In [6]:
dataset = client.create_dataset(name="Geospatial vessel detection")

# Import Data Rows with Metadata

In [9]:
task = dataset.create_data_rows(data_rows)
task.wait_till_done()

Examine a Data Row

In [10]:
datarow = next(dataset.data_rows())
print(datarow)

<DataRow {
    "created_at": "2023-02-15 00:36:08+00:00",
    "external_id": "positive_image_set/497.jpg",
    "global_key": null,
    "media_attributes": {},
    "metadata": [],
    "metadata_fields": [],
    "row_data": "https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/positive_image_set/497.jpg",
    "uid": "cle4xzy7s02ijtjzmcj278blv",
    "updated_at": "2023-02-15 00:36:08+00:00"
}>


# Setup a labeling project

In [11]:
ontology = OntologyBuilder()

for tool in annotations['categories']:
  print(tool['name'])
  ontology.add_tool(Tool(tool = Tool.Type.BBOX, name = tool['name']))

ontology = client.create_ontology("Vessel detection ontology", ontology.asdict())
project = client.create_project(name="Vessel detection", media_type=MediaType.Image)
project.setup_editor(ontology)
ontology_from_project = OntologyBuilder.from_project(project)

Default createProject behavior will soon be adjusted to prefer batch projects. Pass in `queue_mode` parameter explicitly to opt-out for the time being.


airplane
ship
storage_tank
baseball_diamond
tennis_court
basketball_court
ground_track_field
harbor
bridge
vehicle


Prepare and queue batch of Data Rows to the project

In [12]:
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]

# Randomly select 200 Data Rows
sampled_data_rows = random.sample(data_rows, 200)

batch = project.create_batch(
  "Initial batch", # name of the batch
  sampled_data_rows, # list of Data Rows
  1 # priority between 1-5
)

ResourceConflict: Batch with name 'Initial batch' already exists in this project("Batch with name 'Initial batch' already exists in this project", None)

# Process ground truth annotations for import

In [13]:
queued_data_rows = project.export_queued_data_rows()
ground_truth_list = list()

for datarow in queued_data_rows:
  annotations_list = []
  folder = datarow['externalId'].split("/")[0]
  id = datarow['externalId'].split("/")[1]
  if folder == "positive_image_set":
    for image in annotations['images']:
      if (image['file_name']==id):
        for annotation in annotations['annotations']:
          if annotation['image_id'] == image['id']:
            bbox = annotation['bbox']
            id = annotation['category_id'] - 1
            class_name = ontology_from_project.tools[id].name
            annotations_list.append(ObjectAnnotation(
                name = class_name,
                value = Rectangle(start = Point(x = bbox[0], y = bbox[1]), end = Point(x = bbox[2]+bbox[0], y = bbox[3]+bbox[1])),
            ))
  image = ImageData(uid = datarow['id'])
  ground_truth_list.append(Label(data = image, annotations = annotations_list))

# Import ground truth annotation

In [14]:
ground_truth_ndjson = list(NDJsonConverter.serialize(ground_truth_list))
print(ground_truth_ndjson)
start_time = time.time()
## Upload annotations
upload_task = LabelImport.create_from_objects(client, project.uid, "geospatial-import-job-1", ground_truth_ndjson)
print(upload_task)

#Wait for upload to finish (Will take up to five minutes)
upload_task.wait_until_done()
print(upload_task.errors)
print("--- Finished in %s mins ---" % ((time.time() - start_time)/60))

<LabelImport {
    "error_file_url": null,
    "input_file_url": "https://storage.googleapis.com/lb-dev0-na-us-import/uploaded_predictions/cldos55nl000ckuu377xc8b65/d89112c6-877e-db9d-a85d-030122987dad-cle4y04f102j9tjzm8fo824in__geospatial-import-job-1.ndjson?GoogleAccessId=sa-lb-api-0da28c-7505d6%40lb-dev0-na-us.iam.gserviceaccount.com&Expires=1677026550&Signature=s8nChdOGNCYrY4yUVvabuwYcEXVCQKEyDUyvSnIhS4c8fiv4KGuxU%2FOaSLHNtaKZu10Mjy6PQtv23Tq4rO%2F2l23%2BB%2BP5awhRTDNdI5zKSk1xlb%2BY74pqp0MtBNicGWvlY1LSbeoy6nry20yaDMYOdO777Q71oAKemk1BL%2BA1iWzs%2BZcuObD3FEs1zDQDxk1Y%2BZEQvBdEpNZgjFJ1AmEthRx2BopUo%2Fx6lwonNDWWVu8MvSohwMVsxFEVX2WnDlcoZhL%2FAJROxYq7SBtEy2jAUW%2Bhg5eLDIq1AYNq8HH6ra0mfMTBXEqTcS0SgnNAmRup%2FBhBaysnWq%2B7W4lIoAewRw%3D%3D",
    "name": "geospatial-import-job-1",
    "progress": null,
    "state": "AnnotationImportState.RUNNING",
    "status_file_url": null,
    "uid": "a0996f74-9780-0a1c-726d-054a1d6efd75"
}>


Exception ignored in: <function tqdm.__del__ at 0x111eb6e60>
Traceback (most recent call last):
  File "/Users/valbro13/.pyenv/versions/3.10.9/lib/python3.10/site-packages/tqdm/std.py", line 1162, in __del__
    self.close()
  File "/Users/valbro13/.pyenv/versions/3.10.9/lib/python3.10/site-packages/tqdm/notebook.py", line 288, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'
Exception ignored in: <function tqdm.__del__ at 0x111eb6e60>
Traceback (most recent call last):
  File "/Users/valbro13/.pyenv/versions/3.10.9/lib/python3.10/site-packages/tqdm/std.py", line 1162, in __del__
    self.close()
  File "/Users/valbro13/.pyenv/versions/3.10.9/lib/python3.10/site-packages/tqdm/notebook.py", line 288, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


ValueError: Import failed.

In [None]:
# queued_data_rows = [dr['id'] for dr in list(project.export_queued_data_rows())]
# data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
# data_rows_not_queued = list(set(data_rows)- set(queued_data_rows))

# # Randomly select 200 Data Rows
# sampled_data_rows = random.sample(data_rows_not_queued, 200)

# batch = project.create_batch(
#   "Second batch", # name of the batch
#   sampled_data_rows, # list of Data Rows
#   5 # priority between 1-5
# )
