
# Using The darwin-py SDK

## Modules

In [1]:
# Required darwin imports for SDK use in the notebook
import darwin
import darwin.importer as importer
from darwin.importer import get_importer
from darwin.client import Client
from darwin.item import DatasetItem
from darwin.datatypes import AnnotationClass
from darwin.dataset import RemoteDataset
from darwin.exceptions import NotFound


# Required python imports for notebook usage
import re
import glob
import os
import json
from IPython.display import HTML
from IPython.display import IFrame
import time
from pathlib import Path

## Inititalise Darwin

### Generate API Key

<div class="alert alert-block alert-info"> <b>Tip:</b> You can generate your API key <a style="font_size: 120%; text-decoration: underline; cursor: pointer;" href="https://darwin.v7labs.com/?settings=api-keys"> here </a> 
    
>  Run the cell below for a video guide on how to generate an API key </div>

In [None]:
IFrame("https://www.loom.com/embed/3d414eb9c0444b0f9309d49b8e393414", width=950, height=400)

<div class="alert alert-block alert-warning">
<b>🚧 Permissions:</b> The API key permissions selected when creating the key will determine which commands in the SDK are allowed. If the key has insufficient permissions for a certain action an error will be shown: <i style="color: #E75C58">Insufficient permissions </i> or <i style="color: #E75C58"> Invalid API key </i>.
</div>

### Authenticate

In [6]:
API_KEY = "<YOUR-API-KEY>"

client = Client.from_api_key(API_KEY)

### Authenticate From A Config 

<div class="alert alert-block alert-info"> <b>Tip:</b> You have 2 options here, either you can use your local conifg.yaml found in your darwin configuration <i style="color: #abaaa6"> ~/.darwin/config.yaml </i>. Alternatively, create a yaml config file and pass the file path through to the client object. </div>

#### From local darwin config

In [None]:
client = Client.local()

#### From sepcific config.yaml file

In [None]:
client = Client.from_config("~./your_yaml_config_path.yaml")

## Interact With Datasets

### Create / Load Datasets

In [7]:
dataset_name = "<YOUR-DATASET-NAME>"
dataset = client.create_dataset(dataset_name)

<div class="alert alert-block alert-warning">
<b>🚧 Error Handling:</b> To avoid a <i style="color: #E75C58"> Name Taken </i> error <u>(or simply load an already created dataset - see except)</u>, users can add a conditional block like below.
</div>

In [20]:
dataset_name = "<DATASET-NAME>"
slug_dataset_name = dataset_name.lower().strip().replace(' ', '-')

try:
    dataset = client.create_dataset(dataset_name)
    print('New dataset created.')
except darwin.exceptions.NameTaken:
    dataset_2 = client.get_remote_dataset(slug_dataset_name)
    print('Dataset name already created, connected to existing.')

Dataset name already created, connected to existing.


### View Datasets

#### All local folders

In [None]:
[{'name': i.name, 'id': i.dataset_id, 'slug_name': i.slug, 'no_files': i.item_count, 'progress': i.progress} for i 
 in client.list_local_datasets()]

#### All remote datasets on darwin

In [4]:
[{'name': i.name, 'id': i.dataset_id, 'slug_name': i.slug, 'no_files': i.item_count, 'progress': i.progress} for i 
 in client.list_remote_datasets()]

[{'name': 'Ailly Demo',
  'id': 618445,
  'slug_name': 'ailly-demo',
  'no_files': 116,
  'progress': 0.0},
 {'name': 'bb-aa-conversion',
  'id': 616167,
  'slug_name': 'bb-aa-conversion',
  'no_files': 1,
  'progress': 0.0},
 {'name': 'Databricks Demo',
  'id': 628282,
  'slug_name': 'databricks-demo',
  'no_files': 0,
  'progress': 0.0},
 {'name': 'Demo Dataset',
  'id': 613825,
  'slug_name': 'demo-dataset',
  'no_files': 87,
  'progress': 0.0},
 {'name': 'football-players',
  'id': 614128,
  'slug_name': 'football-players',
  'no_files': 130,
  'progress': 0.023076923076923078},
 {'name': 'intel-vid-testing',
  'id': 618300,
  'slug_name': 'intel-vid-testing',
  'no_files': 800,
  'progress': 0.0},
 {'name': 'Northvolt-Demo',
  'id': 622090,
  'slug_name': 'northvolt-demo',
  'no_files': 5,
  'progress': 0.2},
 {'name': 'ProductShelf',
  'id': 620713,
  'slug_name': 'productshelf',
  'no_files': 2,
  'progress': 0.0},
 {'name': 'slot-videos',
  'id': 622401,
  'slug_name': 'slot-vi

### Delete Datasets

<div class="alert alert-block alert-danger"><b>WARNING:</b> This command will delete any created dataset, so be careful when using.</div>

In [None]:
client.archive_remote_dataset('dataset_id', 'slug_team_name')

<div class="alert alert-block alert-info"> <b>Tip:</b> Alternatively, you can fetch your dataset id by navigating to your dataset in darwin, then just copy the id from your url - e.g.<br> &nbsp &nbsp &nbsp &nbsp <i style="color: #abaaa6"> https://darwin.v7labs.com/datasets/<i style="color: #fc8803">123456</i>/dataset-management </i>
</div>

### Upload Files To Dataset

In [None]:
list_of_files = ['./your_file_path/file.png']

dataset.push(list_of_files)

#### Upload folder of files 

In [5]:
compat_ftypes = (".avi", ".bmp", ".dcm", ".gz", ".hevc", ".jpeg", ".jpg", ".mkv", ".mov", ".mp4", ".ndpi", 
                 ".nii.gz", ".nii", ".pdf", ".png", ".rvg", ".svs", ".tif", ".tiff", ".webp")

your_folder_path = "/Users/harry/Documents/Data/hyperai/temp_video_frames"


upload_files = []
for root, dirs, files in os.walk(your_folder_path):
    for ix, file in enumerate(files):
        if file.lower().endswith(compat_ftypes):
            if file.split(".")[0] != '':
                print(f'{ix} - {file.split(".")[0]}')
                upload_files.append(os.path.join(root, file))
                               
dataset.push(upload_files)

0 - frame_6
1 - frame_7
2 - frame_5
3 - frame_4
4 - frame_0
5 - frame_1
6 - frame_3
7 - frame_2


<darwin.dataset.upload_manager.UploadHandlerV2 at 0x11c13fac0>

### Download Dataset

#### Create an export

In [8]:
release_name = "Your Release Name".lower().strip().replace(' ', '-') #this should be in slug format
dataset.export(release_name)

#### Download export

In [1]:
while True:
    print("Waiting for Release to be created...")
    time.sleep(10)
    try:
        print("Trying to get release")
        release = dataset.get_release(release_name)

        print("Got Release, downloading it!")
        release.download_zip(Path(f"./{release_name}.zip"))
        break
    except NotFound:
        print("Release not ready yet!")
        continue

## Annotations

### View Existing Classes

In [8]:
class_list = dataset.fetch_remote_classes()
class_list

[{'annotation_class_image_url': None,
  'annotation_types': ['polygon',
   'attributes',
   'auto_annotate',
   'measures',
   'inference'],
  'dataset_id': None,
  'datasets': [{'id': 613825}],
  'deletion_blocked': False,
  'description': None,
  'id': 221225,
  'images': [],
  'inserted_at': '2023-03-08T13:11:19',
  'metadata': {'_color': 'rgba(143,255,0,1.0)',
   'attributes': {},
   'auto_annotate': {},
   'inference': {},
   'measures': {},
   'polygon': {}},
  'name': 'correction-football',
  'team_id': 3980,
  'updated_at': '2023-03-08T14:54:07',
  'workflows': [],
  'available': True},
 {'annotation_class_image_url': None,
  'annotation_types': ['polygon',
   'attributes',
   'text',
   'auto_annotate',
   'measures',
   'inference'],
  'dataset_id': None,
  'datasets': [{'id': 613825}],
  'deletion_blocked': False,
  'description': None,
  'id': 219936,
  'images': [],
  'inserted_at': '2023-03-03T14:55:14',
  'metadata': {'_color': 'rgba(219,0,255,1.0)',
   'attributes': {},

In [21]:
for _class in class_list:
    new_dataset..add_annotation_class(AnnotationClass(_class['name'], _class['annotation_types'][0]))

correction-football polygon
football polygon
goal scored tag
player_example bounding_box


### Create New Annotation Class

<u><b> Class Type Options: </b></u>
- polygon <i>(id = 3)</i>
- tag <i>(id = 1)</i>
- line <i>(id = 11)</i>
- ellipse <i>(id = 60)</i>
- bounding_box <i>(id = 2)</i>
- skeleton <i>(id = 12)</i>
- keypoint <i>(id = 7)</i>
- cuboid <i>(id = 8)</i>

In [None]:
# This can be done through the dataset object

dataset.create_annotation_class('class_name', 'class_type')

In [None]:
# Alternatively, it can be done using the client object (this option gives functionality to create 
# multiple classes at once)

client.create_annotation_class(dataset_id, [annotation_class_id], 'name of class')

## e.g. to create a polygon, do:
## client.create_annotation_class(123456, [3], 'polygon_class')

### Add Existing Class To Dataset

<div class="alert alert-block alert-success">
<b></b> If the class already exists for this dataset, no duplicate will be added and no error will be raised
</div>

In [18]:
existing_class = AnnotationClass('player', 'bounding_box')

dataset.add_annotation_class(existing_class)

{'annotation_class_image_url': None,
 'annotation_types': ['bounding_box', 'measures', 'inference'],
 'dataset_id': None,
 'datasets': [{'id': 614128}, {'id': 613825}],
 'deletion_blocked': False,
 'description': None,
 'id': 212698,
 'images': [],
 'inserted_at': '2023-02-09T17:42:57',
 'metadata': {'_color': 'rgba(255,199,0,1.0)',
  'bounding_box': {},
  'inference': {},
  'measures': {}},
 'name': 'player',
 'team_id': 3980,
 'updated_at': '2023-02-09T17:42:57',
 'workflows': []}

## Comments

<div class="alert alert-block alert-info"> <b>Tip:</b> The API is more commonly used and an easier option for comment posting, but we have outlined how to do this from the SDK below also. </div>

### Post a Comment

#### Create a slot

In your json export from the platform, there will be a <b>'slots'</b> key for each file in exported from your dataset.
This is required to create the dataset item, which is the primary argument required for posting a comment to a specific file. 

Either copy & paste from your export <i style='color: #abaaa6'>(see example below)</i>:

In [None]:
slots = [
      {
        "type": "image",
        "slot_name": "0",
        "width": 100,
        "height": 200,
        "thumbnail_url": "https://darwin.v7labs.com/api/v2/teams/slug-team-name/files/fa416a35-7964-4c34-8c9e-47075f8f7292/thumbnail",
        "source_files": [
          {
            "file_name": "file_name.jpeg",
            "url": "https://darwin.v7labs.com/api/v2/teams/slug-team-name/uploads/26830af8-3c7d-4f07-ad31-c5e6d05a30c7"
          }
        ]
      }
    ]

<u>Or</u>, you can programatically get the <b>slot</b> from your export using the below <i>function</i>: 
<a id='export-function'></a>

In [None]:
# The below function gets you most recent export from your export location:
## Alternatively you can just pass the file path in question

def get_darwin_json_export(dataset_item_name, export_location_path, exact_export_file_path=None):
    if exact_export_file_path:
        f = open(exact_export_file_path)
        darwin_export = json.load(f)
        f.close()
    else:
        for file in glob.glob(export_location_path + r'/*json'):
            f = open(file)
            temp_export = json.load(f)
            if temp_export['item']['name'] == dataset_item_name:
                darwin_export = temp_export
            else:
                darwin_export = 'NO json export exists for this file, check your dataset_item_name'
            f.close()
    return darwin_export

In [None]:
example_export = get_darwin_json_export(r'file_name',r'folder_path')
slot = example_export['item']['slots']
slot

#### Create a dataset item

In [None]:
datasetId = [i.dataset_id for i in client.list_remote_datasets() 
             if i.slug == example_export['item']['source_info']['dataset']['slug']][0]
datasetId

In [None]:
dataset_item = DatasetItem(id=example_export['item']['source_info']['item_id'], 
                           filename=example_export['item']['name'], 
                           status=None, archived=False, filesize=None, dataset_id=datasetId, 
                           dataset_slug=example_export['item']['source_info']['dataset']['slug'], 
                           seq=0, path=example_export['item']['name'], slots=example_export['item']['slots'], 
                           current_workflow_id=None, 
                           current_workflow=f"{example_export['item']['source_info']['team']['slug']}/\
                           {example_export['item']['source_info']['dataset']['slug']}"
                           )

#### Post the comment

In [None]:
dataset.post_comment(
                        item = dataset_item, #this is your defined DatasetItem (see above)
                        text = 'test comment', #this is the text string you want to say in your comment
                        x = 15, #the x-coordinate where your comment will appear
                        y = 15, #the y-coordinate where your comment will appear
                        w = 10, #the width of your comment box
                        h = 10  #the height of your comment box
                    )

In [None]:
# View export to check where relevant annotation box is located and then set the x & y parameters accordingly

example_export['annotations']