In [18]:
######################################################################
# Demo of how to use RDFLib with JSON-LD to translate json to rdf
#
# reference/intesting links:
# https://etl.linkedpipes.com/tutorials/how-to/map_json_to_rdf
# https://etl.linkedpipes.com/tutorials/how-to/map_rdf_properties
# https://etl.linkedpipes.com/tutorials/csv-to-rdf/describe_semantics
# https://github.com/digitalbazaar/pyld
######################################################################

## requirment make sure rdflib and rdflib-jsonld are installed
# conda install -c conda-forge rdflib 
# conda install -c bioconda rdflib-jsonld 

# import libraries, not sure the serializer and plugin libs is needed
from rdflib import Graph, ConjunctiveGraph, plugin
from rdflib.serializer import Serializer
import pandas as pds
import json

# create a simple json doc with a json-ld context
doc = """{
"@context":
  {
     "@vocab": "http://foo.com/"
  },
  "@id": "http://example.com/places#BrewEats",
  "@type": "Restaurant",
  "name": ["Brew Eats", "foo"],
   "databaseId": "23987520"
}"""

print(doc)

{
"@context":
  {
     "@vocab": "http://foo.com/"
  },
  "@id": "http://example.com/places#BrewEats",
  "@type": "Restaurant",
  "name": ["Brew Eats", "foo"],
   "databaseId": "23987520"
}


In [7]:
# load doc into rdflib and process
g = Graph().parse(data=doc, format='json-ld')
print(g.serialize(format="nt").decode('utf-8'))

## things to note:
# - the @vocab sets the default/base prefix to 'htt://foo.com/'
# - the base is overriden in BrewEats uri
# - in the json doc, the name key has a list of values: ["Brew Eats", "foo"]
#   but the jsonld processor splits the list into multiple triples, 
# 
#   <http://example.com/places#BrewEats> <http://foo.com/name> "Brew Eats" .
#   <http://example.com/places#BrewEats> <http://foo.com/name> "foo" .
#
#   this is good ...  


<http://example.com/places#BrewEats> <http://foo.com/name> "Brew Eats" .
<http://example.com/places#BrewEats> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://foo.com/Restaurant> .
<http://example.com/places#BrewEats> <http://foo.com/databaseId> "23987520" .
<http://example.com/places#BrewEats> <http://foo.com/name> "foo" .




In [9]:
# output in turtle
# notice the shortened/abbreviated syntax

print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.com/places#BrewEats> a :Restaurant ;
    :databaseId "23987520" ;
    :name "Brew Eats",
        "foo" .




In [11]:
# here is a more complex example with multiple records
books = """
{
  "@context": {
    "dc": "http://purl.org/dc/elements/1.1/",
    "ex": "http://example.org/vocab#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "ex:contains": {
      "@type": "@id"
    }
  },
  "@graph": [
    {
      "@id": "http://example.org/library",
      "@type": "ex:Library",
      "ex:contains": "http://example.org/library/the-republic"
    },
    {
      "@id": "http://example.org/library/the-republic",
      "@type": "ex:Book",
      "dc:creator": "Plato",
      "dc:title": "The Republic",
      "ex:contains": "http://example.org/library/the-republic#introduction"
    },
    {
      "@id": "http://example.org/library/the-republic#introduction",
      "@type": "ex:Chapter",
      "dc:description": "An introductory chapter on The Republic.",
      "dc:title": "The Introduction"
    }
  ]
}
"""
print(books)



{
  "@context": {
    "dc": "http://purl.org/dc/elements/1.1/",
    "ex": "http://example.org/vocab#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "ex:contains": {
      "@type": "@id"
    }
  },
  "@graph": [
    {
      "@id": "http://example.org/library",
      "@type": "ex:Library",
      "ex:contains": "http://example.org/library/the-republic"
    },
    {
      "@id": "http://example.org/library/the-republic",
      "@type": "ex:Book",
      "dc:creator": "Plato",
      "dc:title": "The Republic",
      "ex:contains": "http://example.org/library/the-republic#introduction"
    },
    {
      "@id": "http://example.org/library/the-republic#introduction",
      "@type": "ex:Chapter",
      "dc:description": "An introductory chapter on The Republic.",
      "dc:title": "The Introduction"
    }
  ]
}



In [14]:
# because you are using the graph syntax rdflib requires you a *ConjunctiveGraph*
# https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html?highlight=conjunctivegraph#rdflib.graph.ConjunctiveGraph

# I had to parse the graph in two steps, instead of single step like above
g = ConjunctiveGraph()
g.parse(data=books, format='json-ld')

# print rdf
print(g.serialize(format="nt").decode('utf-8'))


<http://example.org/library/the-republic> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/vocab#Book> .
<http://example.org/library/the-republic#introduction> <http://purl.org/dc/elements/1.1/title> "The Introduction" .
<http://example.org/library/the-republic> <http://purl.org/dc/elements/1.1/creator> "Plato" .
<http://example.org/library/the-republic> <http://purl.org/dc/elements/1.1/title> "The Republic" .
<http://example.org/library> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/vocab#Library> .
<http://example.org/library/the-republic#introduction> <http://purl.org/dc/elements/1.1/description> "An introductory chapter on The Republic." .
<http://example.org/library/the-republic> <http://example.org/vocab#contains> <http://example.org/library/the-republic#introduction> .
<http://example.org/library> <http://example.org/vocab#contains> <http://example.org/library/the-republic> .
<http://example.org/library/the-republic#introduction> <htt

In [15]:
# out in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix ex: <http://example.org/vocab#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/library> a ex:Library ;
    ex:contains <http://example.org/library/the-republic> .

<http://example.org/library/the-republic> a ex:Book ;
    ex:contains <http://example.org/library/the-republic#introduction> ;
    dc:creator "Plato" ;
    dc:title "The Republic" .

<http://example.org/library/the-republic#introduction> a ex:Chapter ;
    dc:description "An introductory chapter on The Republic." ;
    dc:title "The Introduction" .




In [16]:
### now try converting pandas dataframe to rdf using json-ld

# load patients dataset
patients = pds.ExcelFile('patients_1.xlsx').parse()
patients

Unnamed: 0,patient_id,gender,birth_date
0,10001,M,1950-01-01
1,10002,F,1960-01-02
2,10003,M,1970-01-03
3,10004,F,1980-01-04
4,10005,M,1990-01-05
5,10006,F,1955-01-06
6,10007,M,1965-01-07
7,10008,F,1975-01-08
8,10009,M,1985-01-09
9,10010,F,1995-01-10


In [21]:
# print dataframe as json
# 'dumps' stands for 'dump string' and is used for encoding as json; I'm not sure if it is needed ...
# https://docs.python.org/3/library/json.html
print(json.dumps(patients.to_json(orient='records')))

"[{\"patient_id\":10001,\"gender\":\"M\",\"birth_date\":\"1950-01-01\"},{\"patient_id\":10002,\"gender\":\"F\",\"birth_date\":\"1960-01-02\"},{\"patient_id\":10003,\"gender\":\"M\",\"birth_date\":\"1970-01-03\"},{\"patient_id\":10004,\"gender\":\"F\",\"birth_date\":\"1980-01-04\"},{\"patient_id\":10005,\"gender\":\"M\",\"birth_date\":\"1990-01-05\"},{\"patient_id\":10006,\"gender\":\"F\",\"birth_date\":\"1955-01-06\"},{\"patient_id\":10007,\"gender\":\"M\",\"birth_date\":\"1965-01-07\"},{\"patient_id\":10008,\"gender\":\"F\",\"birth_date\":\"1975-01-08\"},{\"patient_id\":10009,\"gender\":\"M\",\"birth_date\":\"1985-01-09\"},{\"patient_id\":10010,\"gender\":\"F\",\"birth_date\":\"1995-01-10\"}]"


In [31]:
# let's create a context
context = """
  "@context":
  {
     "@vocab": "http://foo.com/"
  }
"""

# load dataframe as json string
# data = json.dumps(patients.to_json(orient='records'))
data = patients.to_json(orient='records')
    
# build context + data json-ld doc
# see books json above for example
doc = """
{
  %s,
  "@graph":
  %s
}
""" % (context, data)

print(doc)


{
  
  "@context":
  {
     "@vocab": "http://foo.com/"
  }
,
  "@graph":
  [{"patient_id":10001,"gender":"M","birth_date":"1950-01-01"},{"patient_id":10002,"gender":"F","birth_date":"1960-01-02"},{"patient_id":10003,"gender":"M","birth_date":"1970-01-03"},{"patient_id":10004,"gender":"F","birth_date":"1980-01-04"},{"patient_id":10005,"gender":"M","birth_date":"1990-01-05"},{"patient_id":10006,"gender":"F","birth_date":"1955-01-06"},{"patient_id":10007,"gender":"M","birth_date":"1965-01-07"},{"patient_id":10008,"gender":"F","birth_date":"1975-01-08"},{"patient_id":10009,"gender":"M","birth_date":"1985-01-09"},{"patient_id":10010,"gender":"F","birth_date":"1995-01-10"}]
}



In [32]:
# parse graph
g = ConjunctiveGraph()
g.parse(data=doc, format='json-ld')

<Graph identifier=Nc6e12f519e924d40950aa61dd0daef2d (<class 'rdflib.graph.Graph'>)>

In [33]:
# print rdf
# note that b/c uris weren't defined for individuals blank nodes are created
print(g.serialize(format="nt").decode('utf-8'))

_:N1e3749cb73474a1a806046c005f96668 <http://foo.com/patient_id> "10001"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:N3178ff77d74144b587bf67e6f55b3e6a <http://foo.com/patient_id> "10005"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:N878c81dac5c544cf88ed4a3bd0ccc8eb <http://foo.com/birth_date> "1970-01-03" .
_:N878c81dac5c544cf88ed4a3bd0ccc8eb <http://foo.com/gender> "M" .
_:N9432131d1ffb44709eeaaadb6d10efb9 <http://foo.com/birth_date> "1985-01-09" .
_:N12c9f7ee914348ec86244dcabce0871d <http://foo.com/birth_date> "1995-01-10" .
_:N97ab372cb9b243b9847f94eed6c11b40 <http://foo.com/birth_date> "1965-01-07" .
_:N97ab372cb9b243b9847f94eed6c11b40 <http://foo.com/patient_id> "10007"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:N9f3d6e5315ee4cc78e41e78182bd6f41 <http://foo.com/birth_date> "1980-01-04" .
_:N39bb7715b0c34b168c80d1f4d41f998b <http://foo.com/gender> "F" .
_:Nec99c0c2012a405cb0e43062bc238f96 <http://foo.com/gender> "F" .
_:N12c9f7ee914348ec86244dcabce0871d <http://foo.com/p

In [34]:
# out in turtle
print(g.serialize(format="turtle").decode('utf-8'))

@prefix : <http://foo.com/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] :birth_date "1995-01-10" ;
    :gender "F" ;
    :patient_id 10010 .

[] :birth_date "1950-01-01" ;
    :gender "M" ;
    :patient_id 10001 .

[] :birth_date "1990-01-05" ;
    :gender "M" ;
    :patient_id 10005 .

[] :birth_date "1960-01-02" ;
    :gender "F" ;
    :patient_id 10002 .

[] :birth_date "1955-01-06" ;
    :gender "F" ;
    :patient_id 10006 .

[] :birth_date "1970-01-03" ;
    :gender "M" ;
    :patient_id 10003 .

[] :birth_date "1985-01-09" ;
    :gender "M" ;
    :patient_id 10009 .

[] :birth_date "1965-01-07" ;
    :gender "M" ;
    :patient_id 10007 .

[] :birth_date "1980-01-04" ;
    :gender "F" ;
    :patient_id 10004 .

[] :birth_date "1975-01-08" ;
    :gender "F" ;
    :patient_id 10008 .




In [46]:
# now lets say we have a big dataset ... lets iterate over the records to create rdf
g = Graph()
for row in patients.itertuples():
    # convert row to dict
    # note: you need to use dumps, otherwise you end up an OrderedDict
    #    OrderedDict([('Index', 0), ('patient_id', 10001), ('gender', 'M'), ('birth_date', '1950-01-01')])
    data = ['"%s":%s'%(key, value) for key, value in row._asdict().items()]
    data = data.join(',')
    
    #use context from above, but since it is one row at time we don't need ConjunctiveGraph
    doc = """
    {
      %s, %s
    }
    """ % (context, data)
    print(doc)

AttributeError: 'list' object has no attribute 'join'