This is to explore how to access the different kinds of annotations in a given SBML file.

In [136]:
from data.util import get_dataset
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [137]:
dataset = get_dataset("PDMap")

In [138]:
# libsbml and cobrapy do not support annotations in custom namespaces (e.g. celldesigner or biomodels)
# from data.util import get_dataset, load_sbml
# print("hello world!")
# sbml = load_sbml(dataset)
# print(sbml)

In [139]:
# Using ElementTree...
# TODO: provide prefix map
# import xml.etree.ElementTree as ET
# tree = ET.parse(dataset)
# root = tree.getroot()
# for child in root:
#     print(child.tag, child.attrib)
#
# tree.findall("/sbml/model/annotation/celldesigner:extension/celldesigner:listOfSpeciesAliases")

In [140]:
targetxpath = "/model/annotation/celldesigner:extension/celldesigner:listOfSpeciesAliases"

# Using lxml
from lxml import etree
tree = etree.parse(dataset)

root = tree.getroot()

# does not use proper XPath but a derivative called "ElementTree"
cd_extension = tree.find("/model/annotation/celldesigner:extension", root.nsmap)
print(cd_extension)

<Element {http://www.sbml.org/2001/ns/celldesigner}extension at 0x26db44c4540>


In [141]:
# Extract species aliases (normal and complex)

speciesAliases = cd_extension.findall("celldesigner:listOfSpeciesAliases/celldesigner:speciesAlias", root.nsmap)
print(len(speciesAliases))
complexSpeciesAliases = cd_extension.findall("celldesigner:listOfComplexSpeciesAliases/celldesigner:complexSpeciesAlias", root.nsmap)
print(len(complexSpeciesAliases))

print(len(speciesAliases) + len(complexSpeciesAliases)) # compare to results from KAP ✓


4562
779
5341


In [142]:
# Extract species information and annotations

listOfSpeciesEl = tree.find("/model/listOfSpecies", root.nsmap)
assert listOfSpeciesEl is not None

# compare to results from KAP ✓
assert len(listOfSpeciesEl.findall('species', root.nsmap)) == 2606

In [143]:
mySpecies = listOfSpeciesEl.find("species[@id='s1905']", root.nsmap)

# ids, name, ...
print(mySpecies.attrib)

cd_annots = mySpecies.find("annotation/celldesigner:extension", root.nsmap)
# species/node type (as per [[^2e2cfd]])
print(cd_annots.find("celldesigner:speciesIdentity/celldesigner:class", root.nsmap).text)
# also relevant?
# print(cd_annots.find("celldesigner:listOfCatalyzedReactions", root.nsmap))

{'metaid': 's1905', 'id': 's1905', 'name': 'L-Glutamate', 'compartment': 'default', 'initialAmount': '0', 'charge': '0'}
SIMPLE_MOLECULE


In [144]:
# need to explicitly add these namespaces
nsmap = root.nsmap.copy()
nsmap['rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
nsmap['dc'] = "http://purl.org/dc/elements/1.1/"
nsmap['dcterms'] = "http://purl.org/dc/terms/"
nsmap['vCard'] = "http://www.w3.org/2001/vcard-rdf/3.0#"
nsmap['bqbiol'] = "http://biomodels.net/biology-qualifiers/"
nsmap['bqmodel'] = "http://biomodels.net/model-qualifiers/"

# annotations, cf. [[^83af5]]
rdf_annots = mySpecies.find("annotation/rdf:RDF/rdf:Description", nsmap)

# see http://biomodels.net/biology-qualifiers/ (outdated, incomplete)
# see http://co.mbine.org/standards/qualifiers
# some examples:
# "is a version or an instance of the biological entitiy represented in the model element"
# the referenced entity may be more specific than the linked entity
bio_hasVersion = rdf_annots.find('bqbiol:hasVersion/rdf:Bag', nsmap).findall('rdf:li', nsmap)
print(bio_hasVersion[0].attrib)
# `isVersionOf`: the referenced entity may be more general than the linked entity
# "has identity / exact counterpart"
bio_is = rdf_annots.find('bqbiol:is/rdf:Bag', nsmap).findall('rdf:li', nsmap)
print(bio_is[0].attrib)

{'{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource': 'urn:miriam:reactome:REACT_13119.1'}
{'{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource': 'urn:miriam:obo.chebi:CHEBI%3A16015'}
