# Using the HuBMAP Cells API and the HRA API for Spatial Prediction

Find all HuBMAP datasets with Cell Ontology annotated cells via Azimuth with the Cells API, then use the cell populations for each dataset to find relevant datasets, rui locations, and anatomical structures using the Human Reference Atlas HRApop features in the HRA API. Visualize the results via HRA Jupyter Widgets, including a custom EUI.

# Install libraries

In [None]:
!pip install --upgrade hra_api_client hubmap_api_py_client hra_jupyter_widgets

# Part 1: Get Cell Populations from HuBMAP Datasets using the Cells API

Imports / functions

In [2]:
from hubmap_api_py_client import Client
from collections import Counter

Setup the HuBMAP Cells API Client

In [3]:

endpoint_url = "https://cells.api.hubmapconsortium.org/api/"
client = Client(endpoint_url) 

Find cell types that have annotated datasets

In [None]:
all_celltypes = client.select_celltypes()
assert len(all_celltypes) > 0

celltypes = [c["grouping_name"] for c in all_celltypes.get_list()]
print('cell types:', len(celltypes))

Find all datasets that have been annotated with cell types

In [None]:
datasets = client.select_datasets(where='celltype', has=celltypes).get_list()
assert len(datasets) > 0

uuids = [ d['uuid'] for d in datasets ]
print('annotated datasets with cell types:', len(datasets))

Get cells for each annotated dataset

In [7]:
dataset_cells = {}
dataset_organ = {}
dataset_modality = {}

for uuid in uuids:
    cells_in_dataset = client.select_cells(where='dataset', has=[uuid])
    all_cells = cells_in_dataset.get_list().results_set.get_list()

    population = Counter()
    for cell in all_cells:
        population[cell['cell_type']] += 1
        dataset_organ[uuid] = cell['organ'].lower()
        dataset_modality[uuid] = cell['modality']

    dataset_cells[uuid] = population

Show raw data results for one dataset

In [None]:
print(uuids[0], 'top cell types:', dataset_cells[uuids[0]].most_common(5))
print(uuids[0], 'organ:', dataset_organ[uuids[0]])
print(uuids[0], 'modality:', dataset_modality[uuids[0]])

# Part 2: Predict Spatial Locations Using the HRA API

Imports / functions

In [9]:
import hra_api_client
import time
import json
from hra_api_client.api import v1_api, hra_pop_api

Setup the HRA API Client

In [10]:
hra_api_endpoint_url = "https://apps.humanatlas.io/api"
configuration = hra_api_client.Configuration(hra_api_endpoint_url)
api_client = hra_api_client.ApiClient(configuration)
hra_api = v1_api.V1Api(api_client)
hra_pop_api = hra_pop_api.HraPopApi(api_client)

Get supported organ lookup

In [None]:
organ_lookup = dict((organ.label.lower(), organ.id) for organ in hra_pop_api.supported_organs())
organ_lookup

Get HRApop cell summary reports for each dataset

In [None]:
dataset_summary = {}

from ipywidgets import IntProgress
progress = IntProgress(min=0, max=len(uuids))
display(progress)

for uuid in uuids:
    progress.value += 1
    total_count = sum(dataset_cells[uuid].values())
    csv = "\n".join(["cell_id,percentage"] + [ f"{cell_id},{count / total_count}" for (cell_id,count) in dataset_cells[uuid].items() ])
    dataset_summary[uuid] = json.loads(hra_pop_api.cell_summary_report_without_preload_content({ "csvString": csv }).data)

Show raw data results for one dataset

In [None]:
print(uuids[0], 'has similar sources:', len(dataset_summary[uuids[0]]['sources']))
print(uuids[0], 'has similar RUI locations:', len(dataset_summary[uuids[0]]['rui_locations']))

dataset_summary[uuids[0]]['rui_locations']

In [None]:
similar_as = list(filter(lambda s: s['cell_source_type'] == "http://purl.org/ccf/AnatomicalStructure", dataset_summary[uuids[0]]['sources']))

as_labels = list(sorted(set([ s['cell_source_label'].lower() for s in similar_as if s['similarity'] > 0.66 ])))
print(uuids[0], 'anatomical structures with similar cell populations:\n', '\n '.join(as_labels))

Create an HRA API session to explore the results

In [None]:
# Gather all RUI locations from the results
data_sources = list( json.dumps(s['rui_locations']).replace("ccf:", "http://purl.org/ccf/") for s in dataset_summary.values() )
filter_organs = list(set(organ_lookup[dataset_organ[uuid]] for uuid in uuids))
filter =  { "ontologyTerms": filter_organs }

# Get a session token for this configuration
api_response = hra_api.session_token({ "dataSources": data_sources })
token=api_response.token
print(token)

After we get the session token, we wait to make sure the session is ready.

In [None]:
db_ready = False
while not db_ready:
    api_response = hra_api.db_status(token)
    print(api_response)
    if api_response.status == 'Ready':
        db_ready = True
    else:
        print('Database not ready yet! Retrying...')
        time.sleep(2)

Show basic statistics about the gathered data

In [None]:
hra_api.aggregate_results(token=token, ontology_terms=filter_organs)

# Part 3: Visualize the results

Imports / functions

In [22]:
from hra_jupyter_widgets import ( BodyUi, Eui )

In [19]:
def keep_node(node, selected_organs):
    organ = node.representation_of
    return (not organ) or organ in selected_organs

def filter_scene(scene, selected_organs):
    return list( json.loads(node.to_json()) for node in scene if keep_node(node, selected_organs))

Display tissue blocks in a 3D scene

In [None]:
selected_organs = [organ_lookup['skin'], organ_lookup['heart'], "http://purl.obolibrary.org/obo/UBERON_0004538", "http://purl.obolibrary.org/obo/UBERON_0004539" ]
scene = filter_scene(hra_api.scene(token=token, ontology_terms=filter_organs), selected_organs)

body_ui = BodyUi(scene=scene, height="500px")
display(body_ui)

Interact with the results in the EUI

In [None]:
eui = Eui(remote_api_endpoint=hra_api_endpoint_url, data_sources=data_sources, filter=filter, selected_organs=selected_organs)
display(eui)