In [71]:
######################################################################
# Demo of how to use RDFLib with JSON-LD to translate json to rdf
#
# reference/intesting links:
# https://etl.linkedpipes.com/tutorials/how-to/map_json_to_rdf
# https://etl.linkedpipes.com/tutorials/how-to/map_rdf_properties
# https://etl.linkedpipes.com/tutorials/csv-to-rdf/describe_semantics
# https://github.com/digitalbazaar/pyld
######################################################################

## requirment make sure rdflib and rdflib-jsonld are installed
# conda install -c conda-forge rdflib 
# conda install -c bioconda rdflib-jsonld 

# import libraries, not sure the serializer and plugin libs is needed
from rdflib import Graph, ConjunctiveGraph, plugin
from rdflib.serializer import Serializer
from textwrap import dedent
import pandas as pds
import json

# create a simple json doc with a json-ld context
doc = """{
"@context":
  {
     "@vocab": "http://foo.com/"
  },
  "@id": "http://example.com/places#BrewEats",
  "@type": "Restaurant",
  "name": ["Brew Eats", "foo"],
   "databaseId": "23987520"
}"""

print(doc)

{
"@context":
  {
     "@vocab": "http://foo.com/"
  },
  "@id": "http://example.com/places#BrewEats",
  "@type": "Restaurant",
  "name": ["Brew Eats", "foo"],
   "databaseId": "23987520"
}


In [72]:
# load doc into rdflib and process
g = Graph().parse(data=doc, format='json-ld')
print(g.serialize(format="nt").decode('utf-8'))

## things to note:
# - the @vocab sets the default/base prefix to 'htt://foo.com/'
# - the base is overriden in BrewEats uri
# - in the json doc, the name key has a list of values: ["Brew Eats", "foo"]
#   but the jsonld processor splits the list into multiple triples, 
# 
#   <http://example.com/places#BrewEats> <http://foo.com/name> "Brew Eats" .
#   <http://example.com/places#BrewEats> <http://foo.com/name> "foo" .
#
#   this is good ...  


<http://example.com/places#BrewEats> <http://foo.com/name> "foo" .
<http://example.com/places#BrewEats> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://foo.com/Restaurant> .
<http://example.com/places#BrewEats> <http://foo.com/name> "Brew Eats" .
<http://example.com/places#BrewEats> <http://foo.com/databaseId> "23987520" .




In [73]:
# output in turtle
# notice the shortened/abbreviated syntax

print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.com/places#BrewEats> a :Restaurant ;
    :databaseId "23987520" ;
    :name "Brew Eats",
        "foo" .




In [74]:
# here is a more complex example with multiple records
books = """
{
  "@context": {
    "dc": "http://purl.org/dc/elements/1.1/",
    "ex": "http://example.org/vocab#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "ex:contains": {
      "@type": "@id"
    }
  },
  "@graph": [
    {
      "@id": "http://example.org/library",
      "@type": "ex:Library",
      "ex:contains": "http://example.org/library/the-republic"
    },
    {
      "@id": "http://example.org/library/the-republic",
      "@type": "ex:Book",
      "dc:creator": "Plato",
      "dc:title": "The Republic",
      "ex:contains": "http://example.org/library/the-republic#introduction"
    },
    {
      "@id": "http://example.org/library/the-republic#introduction",
      "@type": "ex:Chapter",
      "dc:description": "An introductory chapter on The Republic.",
      "dc:title": "The Introduction"
    }
  ]
}
"""
print(books)



{
  "@context": {
    "dc": "http://purl.org/dc/elements/1.1/",
    "ex": "http://example.org/vocab#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "ex:contains": {
      "@type": "@id"
    }
  },
  "@graph": [
    {
      "@id": "http://example.org/library",
      "@type": "ex:Library",
      "ex:contains": "http://example.org/library/the-republic"
    },
    {
      "@id": "http://example.org/library/the-republic",
      "@type": "ex:Book",
      "dc:creator": "Plato",
      "dc:title": "The Republic",
      "ex:contains": "http://example.org/library/the-republic#introduction"
    },
    {
      "@id": "http://example.org/library/the-republic#introduction",
      "@type": "ex:Chapter",
      "dc:description": "An introductory chapter on The Republic.",
      "dc:title": "The Introduction"
    }
  ]
}



In [75]:
# because you are using the graph syntax rdflib requires you a *ConjunctiveGraph*
# https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=conjunctivegraph#rdflib.graph.ConjunctiveGraph

# I had to parse the graph in two steps, instead of single step like above
g = ConjunctiveGraph()
g.parse(data=books, format='json-ld')

# print rdf
print(g.serialize(format="nt").decode('utf-8'))


<http://example.org/library> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/vocab#Library> .
<http://example.org/library/the-republic#introduction> <http://purl.org/dc/elements/1.1/description> "An introductory chapter on The Republic." .
<http://example.org/library/the-republic#introduction> <http://purl.org/dc/elements/1.1/title> "The Introduction" .
<http://example.org/library/the-republic> <http://purl.org/dc/elements/1.1/title> "The Republic" .
<http://example.org/library> <http://example.org/vocab#contains> <http://example.org/library/the-republic> .
<http://example.org/library/the-republic> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/vocab#Book> .
<http://example.org/library/the-republic#introduction> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/vocab#Chapter> .
<http://example.org/library/the-republic> <http://purl.org/dc/elements/1.1/creator> "Plato" .
<http://example.org/library/the-republic> <http://ex

In [76]:
# output in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix ex: <http://example.org/vocab#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/library> a ex:Library ;
    ex:contains <http://example.org/library/the-republic> .

<http://example.org/library/the-republic> a ex:Book ;
    ex:contains <http://example.org/library/the-republic#introduction> ;
    dc:creator "Plato" ;
    dc:title "The Republic" .

<http://example.org/library/the-republic#introduction> a ex:Chapter ;
    dc:description "An introductory chapter on The Republic." ;
    dc:title "The Introduction" .




In [77]:
### now try converting pandas dataframe to rdf using json-ld

# load patients dataset
# for readability I only work with the first 3 patients
patients = pds.ExcelFile('patients_1.xlsx').parse()
patients = patients.head(3)
patients

Unnamed: 0,patient_id,gender,birth_date
0,10001,M,1950-01-01
1,10002,F,1960-01-02
2,10003,M,1970-01-03


In [78]:
# print dataframe as json
# 'dumps' stands for 'dump string' and is used for encoding as json; I'm not sure if it is needed ...
# https://docs.python.org/3/library/json.html
print(json.dumps(patients.to_json(orient='records')))

"[{\"patient_id\":10001,\"gender\":\"M\",\"birth_date\":\"1950-01-01\"},{\"patient_id\":10002,\"gender\":\"F\",\"birth_date\":\"1960-01-02\"},{\"patient_id\":10003,\"gender\":\"M\",\"birth_date\":\"1970-01-03\"}]"


In [79]:
# let's create a context
context = """
  "@context":
  {
     "@vocab": "http://foo.com/"
  }
"""

# load dataframe as json string
# data = json.dumps(patients.to_json(orient='records'))
data = patients.to_json(orient='records')
    
# build context + data json-ld doc
# see books json above for example
doc = """
{
  %s,
  "@graph":
  %s
}
""" % (context, data)

print(doc)


{
  
  "@context":
  {
     "@vocab": "http://foo.com/"
  }
,
  "@graph":
  [{"patient_id":10001,"gender":"M","birth_date":"1950-01-01"},{"patient_id":10002,"gender":"F","birth_date":"1960-01-02"},{"patient_id":10003,"gender":"M","birth_date":"1970-01-03"}]
}



In [80]:
# parse graph
g = ConjunctiveGraph()
g.parse(data=doc, format='json-ld')

<Graph identifier=N82b6e93964c24df6b3758e1fc795d33b (<class 'rdflib.graph.Graph'>)>

In [81]:
# print rdf
# note that b/c URIs weren't defined for individuals blank nodes are created
print(g.serialize(format="nt").decode('utf-8'))

_:N318214c21b5b499aa2c73a32ba551aa2 <http://foo.com/gender> "F" .
_:N0e5b299e570940c680afa570b530a141 <http://foo.com/gender> "M" .
_:N0e5b299e570940c680afa570b530a141 <http://foo.com/birth_date> "1970-01-03" .
_:N3daac528da2845358a62be9d5f20991b <http://foo.com/birth_date> "1950-01-01" .
_:N3daac528da2845358a62be9d5f20991b <http://foo.com/patient_id> "10001"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:N318214c21b5b499aa2c73a32ba551aa2 <http://foo.com/patient_id> "10002"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:N3daac528da2845358a62be9d5f20991b <http://foo.com/gender> "M" .
_:N318214c21b5b499aa2c73a32ba551aa2 <http://foo.com/birth_date> "1960-01-02" .
_:N0e5b299e570940c680afa570b530a141 <http://foo.com/patient_id> "10003"^^<http://www.w3.org/2001/XMLSchema#integer> .




In [82]:
# output in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] :birth_date "1970-01-03" ;
    :gender "M" ;
    :patient_id 10003 .

[] :birth_date "1960-01-02" ;
    :gender "F" ;
    :patient_id 10002 .

[] :birth_date "1950-01-01" ;
    :gender "M" ;
    :patient_id 10001 .




In [113]:
# now lets say we have a big dataset ... lets iterate over the records to create rdf
context = """
  "@context":
  {
     "@vocab": "http://foo.com/"
  }"""

g = Graph()
for row in patients.itertuples():
    # convert row into a string of key:value pairs
    # note all values have quotes around them ... might not work for all values?
    data = ', \n '.join('"{key}":"{value}"'.format(key=k, value=v) 
                                  for (k, v) in row._asdict().items())

    # use context from above, but since it is one row at time we don't need ConjunctiveGraph
    doc = "{%s, \n %s \n}" % (context, data)
    g.parse(data=doc, format='json-ld')
    print(doc+"\n")

{
  "@context":
  {
     "@vocab": "http://foo.com/"
  }, 
 "Index":"0", 
 "patient_id":"10001", 
 "gender":"M", 
 "birth_date":"1950-01-01" 
}

{
  "@context":
  {
     "@vocab": "http://foo.com/"
  }, 
 "Index":"1", 
 "patient_id":"10002", 
 "gender":"F", 
 "birth_date":"1960-01-02" 
}

{
  "@context":
  {
     "@vocab": "http://foo.com/"
  }, 
 "Index":"2", 
 "patient_id":"10003", 
 "gender":"M", 
 "birth_date":"1970-01-03" 
}



In [112]:
# print rdf
print(g.serialize(format="nt").decode('utf-8'))

_:Nb36aff57dd3e47a78c63d3659eb4718d <http://foo.com/birth_date> "1950-01-01" .
_:Nf5b2f594f17843719739e2245967b46b <http://foo.com/birth_date> "1960-01-02" .
_:Nf5b2f594f17843719739e2245967b46b <http://foo.com/Index> "1" .
_:Nd8c43963c5a9488fb4dbc29669c993f8 <http://foo.com/patient_id> "10003" .
_:Nd8c43963c5a9488fb4dbc29669c993f8 <http://foo.com/birth_date> "1970-01-03" .
_:Nb36aff57dd3e47a78c63d3659eb4718d <http://foo.com/Index> "0" .
_:Nb36aff57dd3e47a78c63d3659eb4718d <http://foo.com/patient_id> "10001" .
_:Nb36aff57dd3e47a78c63d3659eb4718d <http://foo.com/gender> "M" .
_:Nf5b2f594f17843719739e2245967b46b <http://foo.com/patient_id> "10002" .
_:Nf5b2f594f17843719739e2245967b46b <http://foo.com/gender> "F" .
_:Nd8c43963c5a9488fb4dbc29669c993f8 <http://foo.com/Index> "2" .
_:Nd8c43963c5a9488fb4dbc29669c993f8 <http://foo.com/gender> "M" .




In [111]:
# output in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] :Index "0" ;
    :birth_date "1950-01-01" ;
    :gender "M" ;
    :patient_id "10001" .

[] :Index "2" ;
    :birth_date "1970-01-03" ;
    :gender "M" ;
    :patient_id "10003" .

[] :Index "1" ;
    :birth_date "1960-01-02" ;
    :gender "F" ;
    :patient_id "10002" .




In [92]:
## simple example of how to implement fields as instances instead of data properties
## ?record -[has_field]-> ?field .
## ?field -[field_value]-> ?value

doc = """
{
  "@context":
  {
    "@vocab": "http://foo.com#",
    "rp": "http://purl.roswellpark.org/ontology#",
    "ex": "http://example.com#",
    "data_record": "rp:DE_000000003",
    "data_field": "rp:DE_000000007&",
    "patient_id": "data_field:patient_id",
    "gender": "data_field:gender",
    "birth_date": "data_field:birth_date",
    "fv": "rp:DE_000000026",
    "has_field": "rp:DE_000000022"
  }, 
 "@id": "ex:record_1",
 "@type": "data_record",
  "has_field":
  [
    {
      "@id": "ex:record_1/patient_id",
      "@type": "patient_id",
      "fv": "1001"
    },
    {
      "@id": "ex:record_1/gender",
      "@type": "gender",
      "fv":"M"
    }, 
    {
      "@id": "ex:record_1/birth_date",
      "@type": "birth_date",
      "fv": "1950-01-01" 
    }
  ]
}"""

g = Graph().parse(data=doc, format='json-ld')
#print(g.serialize(format="nt").decode('utf-8'))
print(g.serialize(format="turtle").decode('utf-8'))  

@prefix : <http://foo.com#> .
@prefix ex: <http://example.com#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rp: <http://purl.roswellpark.org/ontology#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.com/.#record_1> a rp:DE_000000003 ;
    rp:DE_000000022 <http://example.com/.#record_1/birth_date>,
        <http://example.com/.#record_1/gender>,
        <http://example.com/.#record_1/patient_id> .

<http://example.com/.#record_1/birth_date> a <http://purl.roswellpark.org/ontology#DE_000000007&birth_date> ;
    rp:DE_000000026 "1950-01-01" .

<http://example.com/.#record_1/gender> a <http://purl.roswellpark.org/ontology#DE_000000007&gender> ;
    rp:DE_000000026 "M" .

<http://example.com/.#record_1/patient_id> a <http://purl.roswellpark.org/ontology#DE_000000007&patient_id> ;
    rp:DE_000000026 "1001" .




In [151]:
## now let's iteratere over the data frame and create instances of fields

# define a context
context = """
      "@context":
      {
        "@vocab": "http://foo.com#",
        "rp": "http://purl.roswellpark.org/ontology#",
        "ex": "http://example.com#",
        "data_record": "rp:DE_000000003",
        "data_field": "rp:DE_000000007&",
        "Index": "data_field:Index",
        "patient_id": "data_field:patient_id",
        "gender": "data_field:gender",
        "birth_date": "data_field:birth_date",
        "fv": "rp:DE_000000026",
        "has_field": "rp:DE_000000022"
      }"""
    
g = Graph()
for row in patients.itertuples():
    # convert row into a string of key:value pairs
    # note all values have quotes around them ... might not work for all values?
    data_field = ', \n   '.join('''{"@type":"%s", "fv":"%s"}''' % (k,v) 
                          for (k, v) in row._asdict().items())

    
    # use context from above, but since it is one row at time we don't need ConjunctiveGraph
    doc = """{
        %s,
        "@type": "data_record",
        "has_field":[ \n   %s ]
      }""" % (context, data_field)
    g.parse(data=doc, format='json-ld')
    print(doc+"\n -------------------- \n")

{
        
      "@context":
      {
        "@vocab": "http://foo.com#",
        "rp": "http://purl.roswellpark.org/ontology#",
        "ex": "http://example.com#",
        "data_record": "rp:DE_000000003",
        "data_field": "rp:DE_000000007&",
        "Index": "data_field:Index",
        "patient_id": "data_field:patient_id",
        "gender": "data_field:gender",
        "birth_date": "data_field:birth_date",
        "fv": "rp:DE_000000026",
        "has_field": "rp:DE_000000022"
      },
        "@type": "data_record",
        "has_field":[ 
   {"@type":"Index", "fv":"0"}, 
   {"@type":"patient_id", "fv":"10001"}, 
   {"@type":"gender", "fv":"M"}, 
   {"@type":"birth_date", "fv":"1950-01-01"} ]
      }
 -------------------- 

{
        
      "@context":
      {
        "@vocab": "http://foo.com#",
        "rp": "http://purl.roswellpark.org/ontology#",
        "ex": "http://example.com#",
        "data_record": "rp:DE_000000003",
        "data_field": "rp:DE_000000007&",
      

In [152]:
# print rdf
# note that since the "@id" key was not used blank notes have not been created
print(g.serialize(format="nt").decode('utf-8'))

_:Nfbd56b8877e6463fafea530fd2834387 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.roswellpark.org/ontology#DE_000000007&birth_date> .
_:Nfbd56b8877e6463fafea530fd2834387 <http://purl.roswellpark.org/ontology#DE_000000026> "1950-01-01" .
_:N71540a15e0c14befab6fc82f7b048b07 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.roswellpark.org/ontology#DE_000000007&Index> .
_:N22c95ea02d034733ad13c5408d90bbec <http://purl.roswellpark.org/ontology#DE_000000026> "10002" .
_:N9c468e0b9a504c3bb4a82596fab29216 <http://purl.roswellpark.org/ontology#DE_000000026> "M" .
_:Ndc6fad4d03f947ccb4f4014225b8f9ef <http://purl.roswellpark.org/ontology#DE_000000022> _:Nfbd56b8877e6463fafea530fd2834387 .
_:Na92a2483e5594ac2bda7de92b85592f2 <http://purl.roswellpark.org/ontology#DE_000000022> _:N8cad957e401644d289cbe705c86163ed .
_:N4c819f8c71844b8187dfd454316c023c <http://purl.roswellpark.org/ontology#DE_000000026> "10001" .
_:N8cad957e401644d289cbe705c86163ed <http://www.w3.org/199

In [153]:
# output in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com#> .
@prefix ex: <http://example.com#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rp: <http://purl.roswellpark.org/ontology#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a rp:DE_000000003 ;
    rp:DE_000000022 [ a <http://purl.roswellpark.org/ontology#DE_000000007&birth_date> ;
            rp:DE_000000026 "1960-01-02" ],
        [ a <http://purl.roswellpark.org/ontology#DE_000000007&patient_id> ;
            rp:DE_000000026 "10002" ],
        [ a <http://purl.roswellpark.org/ontology#DE_000000007&Index> ;
            rp:DE_000000026 "1" ],
        [ a <http://purl.roswellpark.org/ontology#DE_000000007&gender> ;
            rp:DE_000000026 "F" ] .

[] a rp:DE_000000003 ;
    rp:DE_000000022 [ a <http://purl.roswellpark.org/ontology#DE_000000007&birth_date> ;
            rp:DE_000000026 "1970-01-03" ],
        [ a 