# HRA Workflows Usage

This notebook shows how to use the HRA Workflows algorithms to run cell type annotation tools over sc/snRNASeq data in h5ad format

## Install pre-requisites

For this notebook, we require `singularity` to be installed on the machine (See the [user guide](https://docs.sylabs.io/guides/latest/user-guide/quick_start.html)) and a few python packages.

In [None]:
%pip install cwltool cwlref-runner requests

## Imports

In [None]:
from cwltool.main import main as cwl_main
import tempfile
import json
import os.path
import requests

## Functions

We create some helper functions to simplify running cwl workflows from python.

In [None]:
USE_SINGULARITY = True # Set to False to use Docker

def cwlrunner(workflow, job = None, output_dir = "."):
    """
    Runs a CWL (Common Workflow Language) workflow using Singularity.

    Parameters:
    workflow (str): The path or URL to the CWL workflow file.
    job (dict, optional): A dictionary representing the job inputs, if any.
    output_dir (str, optional): The path to a directory the outputs will go into. Default is current directory.

    Example:
    >>> cwlrunner('path/to/workflow.cwl', {'input1': 'value1', 'input2': 'value2'})
    """
    base_args = ['--singularity'] if USE_SINGULARITY else []
    if not job:
        cwl_main(base_args + ['--outdir', output_dir, workflow])
    else:
        with tempfile.NamedTemporaryFile(mode="w+t", dir=".", suffix=".json") as job_file:
            job_file.write(json.dumps(job))
            job_file.flush()
            cwl_main(['--outdir', output_dir, workflow, job_file.name])

def download_hra_workflows_models(output_dir = "."):
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/hra-workflows/main/download-models.cwl", output_dir = output_dir)

def run_hra_workflows(job, output_dir = "."):
    cwlrunner("https://raw.githubusercontent.com/hubmapconsortium/hra-workflows/main/pipeline.cwl", job, output_dir)

In [None]:
# Function to run all cell type annotation tools from HRA Workflows on a given h5ad file

def run_all_hra_workflows(h5ad_path, dataset_id, organ_id, output_dir, queryLayersKey="raw"):
    job = {
        "organ": organ_id,
        "matrix": {"class": "File", "path": os.path.abspath(h5ad_path)},
        "algorithms": [
            {
                "azimuth": {
                    "referenceDataDir": {
                        "class": "Directory",
                        "path": os.path.abspath("data/hwr/models/azimuth"),
                    },
                    "queryLayersKey": queryLayersKey,
                },
                "crosswalk": {
                    "table": {
                        "class": "File",
                        "path": os.path.abspath("data/hwr/crosswalks/azimuth.csv"),
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match",
                },
                "summarize": {"annotationMethod": "azimuth", "cellSource": dataset_id},
                "directory": "azimuth",
            },
            {
                "celltypist": {"queryLayersKey": queryLayersKey},
                "crosswalk": {
                    "table": {
                        "class": "File",
                        "path": os.path.abspath("data/hwr/crosswalks/celltypist.csv"),
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match",
                },
                "summarize": {
                    "annotationMethod": "celltypist",
                    "cellSource": dataset_id,
                },
                "directory": "celltypist",
            },
            {
                "popv": {
                    "referenceDataDir": {
                        "class": "Directory",
                        "path": os.path.abspath("data/hwr/models/popv/reference-data"),
                    },
                    "modelsDir": {
                        "class": "Directory",
                        "path": os.path.abspath("data/hwr/models/popv/models"),
                    },
                    "queryLayersKey": queryLayersKey,
                },
                "crosswalk": {
                    "table": {
                        "class": "File",
                        "path": os.path.abspath("data/hwr/crosswalks/popv.csv"),
                    },
                    "tableLabelColumn": "Annotation_Label",
                    "tableClidColumn": "CL_ID",
                    "tableMatchColumn": "CL_Match",
                },
                "summarize": {"annotationMethod": "popv", "cellSource": dataset_id},
                "directory": "popv",
            },
        ],
    }

    run_hra_workflows(job, os.path.abspath(output_dir))

In [None]:
# Helper function to get the organ ontology id for a given hubmap dataset

hbm_organ_lookup_url = "https://ontology.api.hubmapconsortium.org/organs?application_context=HUBMAP"
hbm_organ_lookup = dict( (x["rui_code"], x["organ_uberon"]) for x in json.loads(requests.get(hbm_organ_lookup_url).content))

def get_hubmap_organ(uuid):
    ancestors = requests.get(f"https://entity.api.hubmapconsortium.org/entities/{uuid}/ancestor-organs").json()
    for parent in ancestors:
        if parent.get("sample_category") == "organ" and parent["organ"] in hbm_organ_lookup:
            return hbm_organ_lookup[parent["organ"]]

In [None]:
# Download HRA Workflows Runner data

if not os.path.exists('data/hwr/crosswalks'):
    !mkdir -p data/hwr/crosswalks
    !curl -o data/hwr/crosswalks/azimuth.csv https://cdn.humanatlas.io/digital-objects/ctann/azimuth/latest/assets/azimuth-crosswalk.csv
    !curl -o data/hwr/crosswalks/celltypist.csv https://cdn.humanatlas.io/digital-objects/ctann/celltypist/latest/assets/celltypist-crosswalk.csv
    !curl -o data/hwr/crosswalks/popv.csv https://cdn.humanatlas.io/digital-objects/ctann/popv/latest/assets/popv-crosswalk.csv

if not os.path.exists('data/hwr/models'):
    download_hra_workflows_models('data/hwr')