In [1]:
import urllib.parse
from rdflib import Graph, Literal, Namespace, URIRef, BNode
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS 
import pandas as pd

In [2]:
base_uri = "http://www.sanitasicilia.it/resource/"

g = Graph()
count = 0

sso = Namespace("http://www.sanitasicilia.it/ontology/")

g.bind("sso", sso)

ssr = Namespace("http://www.sanitasicilia.it/resource/")
g.bind("ssr", ssr)


In [3]:
def urify(ns, testo):
    testo=testo.replace(" ","_").replace("\'","")
    return ns+urllib.parse.quote(testo)

In [4]:
#Parafarmacie
count = 0
row_prev = pd.Series()

def addBNodes(res, row):
    bn = BNode()

    g.add([res, sso.hasSite, bn])
    g.add([bn, sso.hasAddress, Literal(row[1], datatype=XSD.string) ])
   
    if(row[3] != 0):
        g.add([bn, sso.hasCap, Literal(row[3], datatype=XSD.integer) ])
    g.add([bn, sso.isIn, URIRef(urify(base_uri, row[4].title()))])

    if(row[7] != 0 and row[8] != 0):
        g.add([bn, sso.hasLatitude, Literal(row[7], datatype=XSD.decimal) ])
        g.add([bn, sso.hasLongitude, Literal(row[8], datatype=XSD.decimal) ])

def addTriples(row):
    global count, row_prev

    if not row_prev.empty:
        if row_prev[0] == row[0]:
            code = 'PF'+f'{count-1:04}'
            addBNodes(URIRef(urify(base_uri, code)), row)
            return
    
    code = 'PF'+f'{count:04}'

    row_prev = pd.Series(row)

    res = URIRef(urify(base_uri, code))
    g.add([res, URIRef(RDF.type), URIRef(sso.Parafarmacia)])

    g.add([res, sso.hasName, Literal(row[0], datatype=XSD.string) ])
    g.add([res, sso.hasVatNumber, Literal(row[2], datatype=XSD.integer) ])

    addBNodes(URIRef(urify(base_uri, code)), row)
    count = count + 1

parafarmacie_df = pd.read_csv("../datasets/csv/completed/parafarmacie.csv")
parafarmacie_df["LATITUDINE"] = parafarmacie_df["LATITUDINE"].apply(lambda x : float(x.replace(',', '.')) if x != '-' else 0.0)
parafarmacie_df["LONGITUDINE"] = parafarmacie_df["LONGITUDINE"].apply(lambda x : float(x.replace(',', '.')) if x != '-' else 0.0)
parafarmacie_df["PARTITAIVA"] = parafarmacie_df["PARTITAIVA"].apply(lambda x : x if x != '-' else 0)


parafarmacie_df.apply(lambda row : addTriples(row), axis=1)

g.serialize(destination="../datasets/rdf/parafarmacie.ttl", format="turtle")

<Graph identifier=Nf926878fb0b74b3ab6ad615e58ceadec (<class 'rdflib.graph.Graph'>)>

In [22]:
res = g.query('''
                SELECT ?pfname ?pfAddress
                WHERE {
                    ?pf rdf:type sso:Parafarmacia . 
                    ?pf sso:hasName ?pfname .
                    ?pf sso:hasSite ?pfSite .
                    ?pfSite sso:hasAddress ?pfAddress .
                    FILTER REGEX(?pfname, 'farmarisparmio') .
                }''')

for i in res:
    print(i.asdict())

{'pfname': rdflib.term.Literal('farmarisparmio', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'pfAddress': rdflib.term.Literal('via V. Emanuele', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
{'pfname': rdflib.term.Literal('farmarisparmio', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'pfAddress': rdflib.term.Literal('via garibaldi', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
{'pfname': rdflib.term.Literal('farmarisparmio', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'pfAddress': rdflib.term.Literal('via palermo', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
{'pfname': rdflib.term.Literal('farmarisparmio', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), 'pfAddress': rdflib.term.Literal('via garibaldi', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))}
{'pfname': rdflib.te