In [None]:
################
# This script provides some example sparql queries towards the triple store generated, 
# for example, the data.ttl or data.trig
################

In [1]:
pip install openpyxl==2.6.4

Collecting openpyxl==2.6.4
[?25l  Downloading https://files.pythonhosted.org/packages/d6/26/eb28e975b7a37aad38d7ec4f7a0f652bdee6ecf36e6bd06f473c5af9b87b/openpyxl-2.6.4.tar.gz (173kB)
[K    100% |████████████████████████████████| 174kB 4.6MB/s ta 0:00:01
[?25hCollecting jdcal (from openpyxl==2.6.4)
  Downloading https://files.pythonhosted.org/packages/f0/da/572cbc0bc582390480bbd7c4e93d14dc46079778ed915b505dc494b37c57/jdcal-1.4.1-py2.py3-none-any.whl
Collecting et_xmlfile (from openpyxl==2.6.4)
  Downloading https://files.pythonhosted.org/packages/22/28/a99c42aea746e18382ad9fb36f64c1c1f04216f41797f2f0fa567da11388/et_xmlfile-1.0.1.tar.gz
Building wheels for collected packages: openpyxl, et-xmlfile
  Building wheel for openpyxl (setup.py) ... [?25ldone
[?25h  Stored in directory: /Users/admin/Library/Caches/pip/wheels/58/1d/e3/7c7ee57db55ac00dd5c4632287d8401cdd08ed59c965306f9c
  Building wheel for et-xmlfile (setup.py) ... [?25ldone
[?25h  Stored in directory: /Users/admin/Library/C

In [105]:
import os
import io
import collections
import pandas as pd
import matplotlib.pyplot as plt
from rdflib import ConjunctiveGraph
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer

pd.set_option('display.max_colwidth', 200)

if not os.path.exists('outputs'):
    os.makedirs('outputs')
    
g = ConjunctiveGraph()
g.parse('data.ttl', format='trig')

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = io.BytesIO()
    serializer.serialize(output)
    return pd.read_csv(io.StringIO(output.getvalue().decode('utf-8')), encoding='utf-8')
    
def write(df, fn):
    df.to_excel('outputs/{}'.format(fn), encoding='utf-8')

In [106]:
#display all triples
q = query("""
prefix ae: <http://actrisexample.eu/ns/>
prefix dcterms: <http://purl.org/dc/terms/>
prefix envri: <http://envri.eu/ns/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix xml: <http://www.w3.org/XML/1998/namespace>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?subject ?property ?object
WHERE {
 ?subject ?property ?object.
}
""")

display(q)
#write(q, 'output1.xlsx')

Unnamed: 0,subject,property,object
0,http://actrisexample.eu/ns/featureType,http://actrisexample.eu/ns/hasvalue,timeSeries
1,http://actrisexample.eu/ns/Station_GAW_type,http://www.w3.org/2000/01/rdf-schema#label,Station_GAW_type
2,http://actrisexample.eu/ns/title,http://www.w3.org/2000/01/rdf-schema#label,title
3,http://actrisexample.eu/ns/time_coverage_start,http://actrisexample.eu/ns/hasvalue,2003-01-01T00:00:00_UTC
4,http://actrisexample.eu/ns/Sample_duration,http://www.w3.org/2000/01/rdf-schema#label,Sample_duration
5,http://actrisexample.eu/ns/Acknowledgement,http://www.w3.org/2000/01/rdf-schema#label,Acknowledgement
6,http://actrisexample.eu/ns/creator_institution,http://actrisexample.eu/ns/hasvalue,",_""National_Oceanic_and_Atmospheric_Administration/Earth_System_Research_Laboratory/Global_Monitoring_Division,_NOAA/ESRL/GMD"""
7,http://actrisexample.eu/ns/Standard_method,http://actrisexample.eu/ns/hasvalue,SOP=GAW227
8,http://actrisexample.eu/ns/Detection_limit,http://actrisexample.eu/ns/hasvalue,0.047429_1/Mm
9,http://actrisexample.eu/ns/Station_altitude,http://www.w3.org/2000/01/rdf-schema#label,Station_altitude


In [107]:
#display triples has value 'aerosol_absorption_coefficient'
q = query("""
prefix ae: <http://actrisexample.eu/ns/>
prefix dcterms: <http://purl.org/dc/terms/>
prefix envri: <http://envri.eu/ns/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix xml: <http://www.w3.org/XML/1998/namespace>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>


SELECT ?subject ?property ?object
WHERE {
VALUES ?object { 'aerosol_absorption_coefficient' }

 ?subject ?property ?object .
 }
""")

display(q)

Unnamed: 0,subject,property,object
0,http://actrisexample.eu/ns/Component,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
1,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/318,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
2,http://vocab.nerc.ac.uk/collection/P07/current/3TUNI9CM/,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
3,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/317,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
4,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/316,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
5,ub8bL312C17,http://www.w3.org/2000/01/rdf-schema#label,aerosol_absorption_coefficient


In [108]:
#display triples has value 'aerosol_absorption_coefficient'
#in current context, the result is the same with above cell
q = query("""
prefix ae: <http://actrisexample.eu/ns/>
prefix dcterms: <http://purl.org/dc/terms/>
prefix envri: <http://envri.eu/ns/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix xml: <http://www.w3.org/XML/1998/namespace>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>


SELECT ?subject ?property ?object
WHERE {
VALUES ?object { 'aerosol_absorption_coefficient' }
VALUES ?property {ae:hasvalue}
#VALUES ?subject {<http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/317>}
 ?subject ?property ?object .
 }
""")

display(q)

Unnamed: 0,subject,property,object
0,http://vocab.nerc.ac.uk/collection/P07/current/3TUNI9CM/,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
1,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/316,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
2,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/318,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
3,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/317,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient
4,http://actrisexample.eu/ns/Component,http://actrisexample.eu/ns/hasvalue,aerosol_absorption_coefficient


In [144]:
#display the instrument filter_absorption_photometer and the variable it measures, aerosol_absorption_coefficient,
# also with its synonyms

q = query("""
prefix ae: <http://actrisexample.eu/ns/>
prefix dcterms: <http://purl.org/dc/terms/>
prefix envri: <http://envri.eu/ns/>
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix skos: <http://www.w3.org/2004/02/skos/core#>
prefix xml: <http://www.w3.org/XML/1998/namespace>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>


SELECT DISTINCT  ?instrument ?property ?parameter ?syn
WHERE {
VALUES ?property {ae:measures}
VALUES ?instrument {'filter_absorption_photometer'}
VALUES ?parameter {'aerosol_absorption_coefficient'}
 ?instru ?property ?par .
 ?instru rdfs:label ?instrument .
 ?par rdfs:label ?parameter .
 ?syn ae:hasvalue 'aerosol_absorption_coefficient' .
 }
""")

display(q)

Unnamed: 0,instrument,property,parameter,syn
0,filter_absorption_photometer,http://actrisexample.eu/ns/measures,aerosol_absorption_coefficient,http://vocab.nerc.ac.uk/collection/P07/current/3TUNI9CM/
1,filter_absorption_photometer,http://actrisexample.eu/ns/measures,aerosol_absorption_coefficient,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/317
2,filter_absorption_photometer,http://actrisexample.eu/ns/measures,aerosol_absorption_coefficient,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/318
3,filter_absorption_photometer,http://actrisexample.eu/ns/measures,aerosol_absorption_coefficient,http://codes.wmo.int/wmdr/ObservedVariableAtmosphere/316
4,filter_absorption_photometer,http://actrisexample.eu/ns/measures,aerosol_absorption_coefficient,http://actrisexample.eu/ns/Component
