# Check 2D FTU cell labels against Cell Ontology

In [1]:
%pip install requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
CROSSWALK="https://cdn.humanatlas.io/digital-objects/2d-ftu/asct-b-2d-models-crosswalk/latest/assets/asct-b-2d-models-crosswalk.csv"
FIRST_COL_NAME="organ_label"
SPARQL_ENDPOINT="https://lod.humanatlas.io/sparql"

In [3]:
# Reusable functions

import requests
import csv
from io import StringIO

def sparql_select(query, endpoint=SPARQL_ENDPOINT):
    content = requests.post(endpoint, {"query": query}, headers={"Accept": "text/csv"}).text
    with StringIO(content) as csvText:
        content = list(csv.DictReader(csvText))
    return content

In [4]:
crosswalk = requests.get(CROSSWALK).text
crosswalk_lines = crosswalk.splitlines()
header_index = next(i for i, line in enumerate(crosswalk_lines) if line.startswith(FIRST_COL_NAME))
csv_reader = csv.DictReader(crosswalk_lines[header_index:])
parsed_data = list(csv_reader)
parsed_data[0]

{'organ_label': 'Kidney',
 'organ_id': 'UBERON:0002113',
 'anatomical_structure_of': '#FTUCorticalCollectingDuct',
 'source_spatial_entity': '#2DRefObjects',
 'node_group': 'Cortical_Collecting_Duct_Principal_Cell',
 'node_name': 'Cortical_Collecting_Duct_Principal_Cell_1',
 'label': 'kidney cortex collecting duct principal cell',
 'OntologyID': 'CL:1000714',
 'representation_of': 'http://purl.obolibrary.org/obo/CL_1000714',
 'svg file of single 2DFTU': '2d-ftu-kidney-cortical-collecting-duct',
 'exist_asctb': '1',
 'type': 'CT',
 'REF/1': 'Krstic, R.V. 1991. Human Microscopic Anatomy: An Atlas for Students of Medicine and Biology. Springer. https://books.google.com/books?id=hB6nqQCPKBoC.',
 'REF/1/ID': 'ISBN:9783540536666',
 'REF/1/NOTES': '',
 'REF/2': 'Ross, M.H., and W. Pawlina. 2006. Histology. Lippincott Williams & Wilkins. https://books.google.com/books?id=FoSiGTXn6BUC.',
 'REF/2/ID': 'ISBN:9780781767903',
 'REF/2/NOTES': '',
 'REF/3': 'Hu, Rui, Alicia A. McDonough, and Anita T.

In [5]:
values = " ".join([ f"({i + header_index} <{row['representation_of']}> \"{row['label']}\" )" for i, row in enumerate(parsed_data) ])
query = f"""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT *
FROM <https://purl.humanatlas.io/vocab/cl>
WHERE {{
  VALUES (?csv_row ?iri ?crosswalk_label) {{
    {values}
  }}
  ?iri rdfs:label ?cl_label .
  FILTER(LCASE(STR(?cl_label)) != LCASE(STR(?crosswalk_label)))
}}
"""

mismatched_labels = sparql_select(query)
print(len(mismatched_labels), "mismatched labels")

264 mismatched labels


In [6]:
with open('data/2d-ftu-mismatched-labels.csv', 'w', newline='') as csvfile:
    fieldnames = mismatched_labels[0].keys()
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(mismatched_labels)

print("mismatched labels written to data/2d-ftu-mismatched-labels.csv")

mismatched labels written to data/2d-ftu-mismatched-labels.csv
