## Notebook to extract image metadata for IDR demo

In [4]:
from IPython.display import display, HTML
import requests

# initial data
IDR_BASE_URL = "https://idr.openmicroscopy.org"

INDEX_PAGE = "%s/webclient/?experimenter=-1" % IDR_BASE_URL

print("Homepage of IDR webclient: " + INDEX_PAGE)
# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

Homepage of IDR webclient: https://idr.openmicroscopy.org/webclient/?experimenter=-1


### Prepare object

In [30]:
class Dataset(object):
    def __init__(self):
        self.id = None
        self.name = None
        self.count = 0
        self.image_list = []
    def __repr__(self):
        return "Dataset_ID:%s\tName:%s\tCount:%s\tImage_count:%d" % (self.id, self.name, self.count, len(self.image_list))
class Image(object):
    def __init__(self):
        self.imageId = None
        self.name = None
        self.fileId = None
        self.annoId = None
    def __repr__(self):
        return "Image_ID:%s\tname:%s\tFile_ID:%s\tAnno_ID:%s" % (self.imageId, self.name, self.fileId, self.annoId)

In [31]:
PROJECT_ID = 2051
dataset_list = []
image_list = []

### Get all dataset associated with a project

In [32]:
MAP_URL = "{base}/webclient/api/datasets/?{type}={project_id}"

qs = {'base': IDR_BASE_URL, 'type': 'id', 'project_id': PROJECT_ID}
url = MAP_URL.format(**qs)
print("Dataset from a project reponse: " + url)

for a in session.get(url).json()['datasets']:
    dataset = Dataset()
    dataset.id = a['id']
    dataset.name = a['name']
    dataset.count  = a['childCount']
    dataset_list.append(dataset)

Dataset from a project reponse: https://idr.openmicroscopy.org/webclient/api/datasets/?id=2051


### Get images for each dataset

In [33]:
MAP_URL = "{base}/webclient/api/images/?{type}={dataset_id}"

for dataset in dataset_list:
    qs = {'base': IDR_BASE_URL, 'type': 'id', 'dataset_id': dataset.id}
    url = MAP_URL.format(**qs)
    for a in session.get(url).json()['images']:
        if 'processed' in a['name']:
            image = Image()
            image.imageId = a['id']
            image.name = a['name']
            image.fileId = a['filesetId']
            dataset.image_list.append(image)

### Add annotation for each image

In [34]:
MAP_URL = "{base}/webclient/api/annotations/?type=file&{type}={image_id}"

for dataset in dataset_list:
    for image in dataset.image_list:
        qs = {'base': IDR_BASE_URL, 'type': 'image', 'image_id': image.imageId}
        url = MAP_URL.format(**qs)
        for a in session.get(url).json()['annotations']:
            if a['file']['mimetype'] == 'OMERO.tables':
                image.annoId = a['file']['id']

In [36]:
print(dataset_list)

[Dataset_ID:15101	Name:Fibroblasts_01	Count:4	Image_count:2, Dataset_ID:15159	Name:Fibroblasts_02	Count:6	Image_count:4, Dataset_ID:15160	Name:Fibroblasts_03	Count:11	Image_count:9, Dataset_ID:15161	Name:Fibroblasts_04	Count:2	Image_count:0, Dataset_ID:15162	Name:Fibroblasts_05	Count:2	Image_count:0, Dataset_ID:15163	Name:Fibroblasts_06	Count:13	Image_count:11, Dataset_ID:15164	Name:Fibroblasts_07	Count:16	Image_count:14, Dataset_ID:15165	Name:Fibroblasts_08	Count:7	Image_count:5, Dataset_ID:15166	Name:Fibroblasts_09	Count:3	Image_count:1, Dataset_ID:15167	Name:Fibroblasts_10	Count:6	Image_count:4, Dataset_ID:15168	Name:Fibroblasts_11	Count:2	Image_count:0, Dataset_ID:15169	Name:Fibroblasts_12	Count:7	Image_count:5, Dataset_ID:15170	Name:Fibroblasts_13	Count:8	Image_count:6, Dataset_ID:15171	Name:Fibroblasts_14	Count:6	Image_count:4, Dataset_ID:15172	Name:Fibroblasts_15	Count:3	Image_count:1, Dataset_ID:15173	Name:Fibroblasts_16	Count:9	Image_count:7, Dataset_ID:15174	Name:Fibroblasts_

### Export to JSON object

Report dataset_list to pickle 

In [40]:
import pickle

with open("IDR_project_%s.pickle" % (PROJECT_ID), 'wb') as fout:
    pickle.dump(dataset_list, fout)

In [41]:
print(dataset_list[0])

Dataset_ID:15101	Name:Fibroblasts_01	Count:4	Image_count:2


In [44]:
import json

result = []
for dataset in dataset_list:
    new = {'Dataset_ID': dataset.id, 'Dataset_name':dataset.name, 'count':len(dataset.image_list), 'children': []}
    for image in dataset.image_list:
        cell = {'Image_ID':image.imageId, 'Image_name':image.name, 'File_ID':image.fileId, 'Anno_ID':image.annoId}
        new['children'].append(cell)
    result.append(new)
with open("IDR_project_%s.json" % (PROJECT_ID), 'w') as fout:
    json.dump(result, fout) 