In [1]:
!pip install -e .. --upgrade -q

In [2]:
from jsonasobj import as_json
from rdflib import Namespace, Graph, BNode, Literal
from rdflib.namespace import NAME_START_CATEGORIES

from biolinkmodel import datamodel
from biolinkmodel.utils.extended_datamodel import KG

In [3]:
# Namespaces below are temporary.  Will link in to namespaces.org API
HGNC = Namespace('http://identifiers.org/hgnc:')
MGI = Namespace('http://www.informatics.jax.org/accession/MGI:')
NUCC = Namespace('https://www.ncbi.nlm.nih.gov/nuccore/')
UNIPROT = Namespace('http://identifiers.org/uniprot/')
CCDS = Namespace('http://identifiers.org/ccds/')
PUBMED = Namespace('http://identifiers.org/pubmed/')
NCBIGENE = Namespace('https://www.ncbi.nlm.nih.gov/gene/')
MIM = Namespace('http://identifiers.org/mim/')
ENSEMBL = Namespace('http://identifiers.org/ensembl/')
homology = Namespace('http://data2services/model/association/homology/')

In [4]:
# Construct a knowledge graph.  There are a number of parameters that can be appended to the graph
knowledge_graph = KG(id=HGNC)
knowledge_graph.kg_source = "Sample derived from biolink-hgnc.jsonld"

In [5]:
# Add a GeneToGeneHomologyAssociation using the default relationship
assoc = knowledge_graph.add_association(datamodel.GeneToGeneHomologyAssociation,
                                        HGNC["28304"], MGI["1925179"])
print(as_json(assoc))

{
   "id": "Nc459e93ee5894d53a0d9832354be20d5",
   "subject": "http://identifiers.org/hgnc:28304",
   "relation": "http://bioentity.io/vocab/homologous_to",
   "object": "http://www.informatics.jax.org/accession/MGI:1925179"
}


In [6]:
# Add a Transcript to Gene relationship
# NOTE: the actual relationship is not specified in the source.  We're just guessing
assoc = knowledge_graph.add_association(datamodel.TranscriptToGeneRelationship,
                                        NUCC.NM_001018, HGNC['10388'], 'expresses')
print(as_json(assoc))

{
   "id": "N78aa30975cd54fb8aa1158b4faed2c19",
   "subject": "https://www.ncbi.nlm.nih.gov/nuccore/NM_001018",
   "relation": "http://bioentity.io/vocab/expresses",
   "object": "http://identifiers.org/hgnc:10388"
}


In [7]:
# Add a Gene to the graph
# NOTE 1: Not sure what to do with
# "http://data2services/vocab/Status" : [ {
#       "@value" : "Approved"
#     } ],
#
# NOTE 2: Neither category is defined in the model?
gene = knowledge_graph.add_entity(datamodel.Gene, HGNC['10388'])
gene.category = ['gene with gene product', 'protein-coding gene']
gene.filler = ["http://data2services/model/status/Approved", "https://data2services/model/gene-family/RPS"]
gene.located_in = CCDS.CCDS12067
gene.publications = [PUBMED["159154"]]
gene.has_gene_product = UNIPROT.P62841
gene.name = "ribosomal protein S15"
gene.systematic_synonym = "RPS15"
gene.same_as = [NCBIGENE['6209'], MIM['180535'], ENSEMBL.ENSG00000115268]
gene.update_date = "2014-11-19"
print(as_json(gene))

{
   "id": "http://identifiers.org/hgnc:10388",
   "name": "ribosomal protein S15",
   "category": [
      "gene with gene product",
      "protein-coding gene"
   ],
   "systematic_synonym": "RPS15",
   "has_gene_product": "http://identifiers.org/uniprot/P62841",
   "filler": [
      "http://data2services/model/status/Approved",
      "https://data2services/model/gene-family/RPS"
   ],
   "located_in": "http://identifiers.org/ccds/CCDS12067",
   "publications": [
      "http://identifiers.org/pubmed/159154"
   ],
   "same_as": [
      "https://www.ncbi.nlm.nih.gov/gene/6209",
      "http://identifiers.org/mim/180535",
      "http://identifiers.org/ensembl/ENSG00000115268"
   ],
   "update_date": "2014-11-19"
}


In [8]:
# Print the graph as RDF turtle (prefixes stripped for convenience)
print(knowledge_graph.as_rdf(strip_prefixes=True))


[] biolink:entities ( [ a ( "gene with gene product" "protein-coding gene" ) ;
                rdfs:label "ribosomal protein S15" ;
                biolink:filler <http://data2services/model/status/Approved>,
                    <https://data2services/model/gene-family/RPS> ;
                biolink:id <http://identifiers.org/hgnc:10388> ;
                biolink:publications ( PUBMED:159154 ) ;
                biolink:systematic_synonym "RPS15" ;
                biolink:update_date "2014-11-19"^^xsd:date ;
                ns1:RO_0001025 CCDS:CCDS12067 ;
                ns1:RO_0002205 UniProtKB:P62841 ;
                owl:equivalentClass EMSEMBL:ENSG00000115268,
                    MIM:180535,
                    NCBIGENE:6209 ] ) ;
    biolink:id <http://identifiers.org/hgnc:> ;
    biolink:kg_source "Sample derived from biolink-hgnc.jsonld" ;
    biolink:relationship_types ( [ biolink:id <file:///Users/hsolbri1/git/biolink/biolink-model/notebooks/Nc459e93ee5894d53a0d9832354be20d5> 