In [1]:
import urllib.parse
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS 
import pandas as pd

In [2]:
base_uri = "http://www.sanitasicilia.it/resource/"

g = Graph()
count = 0

sso = Namespace("http://www.sanitasicilia.it/ontology/")

g.bind("sso", sso)

ssr = Namespace("http://www.sanitasicilia.it/resource/")
g.bind("ssr", ssr)


In [3]:
def urify(ns, testo):
    testo=testo.replace(" ","_").replace("\'","")
    return ns+urllib.parse.quote(testo)

In [4]:
import re

#Comuni
def addTriples(row):
    res = URIRef(urify(base_uri, row[0]))
    g.add([res, URIRef(RDF.type), URIRef(sso.Comune)])
    g.add([res, sso.hasName, Literal(row[0], datatype=XSD.string)])
    g.add([res, sso.hasTotalPopulation, Literal(row[1], datatype=XSD.integer)])
    g.add([res, sso.hasProvince, Literal(row[2], datatype=XSD.string)])
    g.add([res, sso.hasProvinceAcr, Literal(row[3], datatype=XSD.string)])
    #interlinking
    a = row[0].replace('a\'', 'à').replace('o\'', 'ò').replace('e\'', 'è').replace('i\'', 'ì').replace('u\'', 'ù')
    g.add([res, RDFS.seeAlso, URIRef(urify("https://www.dbpedia.org/resource/", a))])


comuni_df = pd.read_csv("../datasets/completed/comuni_sicilia.csv")

comuni_df.apply(lambda row : addTriples(row), axis=1)


0      None
1      None
2      None
3      None
4      None
       ... 
381    None
382    None
383    None
384    None
385    None
Length: 386, dtype: object

In [5]:
#Farmacie
count = 0
def addTriples(row):
    global count
    code = 'F'+f'{count:04}'
    #sostituire con codice farmacia/parafarmacia piuttosto che nome 
    res = URIRef(urify(base_uri, code))
    g.add([res, URIRef(RDF.type), URIRef(sso.Farmacia)])
    g.add([res, sso.isIn, URIRef(urify(base_uri, row[2].title()))])

    if(row[3] != 0):
        g.add([res, sso.hasCap, Literal(row[3], datatype=XSD.integer) ])
        
    g.add([res, sso.hasAddress, Literal(row[4], datatype=XSD.string) ])
    g.add([res, sso.hasName, Literal(row[5], datatype=XSD.string) ])

    if(row[6] != 0 and row[7] != 0):
        g.add([res, sso.hasLatitude, Literal(row[6], datatype=XSD.decimal) ])
        g.add([res, sso.hasLongitude, Literal(row[7], datatype=XSD.decimal) ])
    g.add([res, sso.hasVatNumber, Literal(row[8], datatype=XSD.integer) ])
    count = count+1


farmacie_df = pd.read_csv("../datasets/farmacie_new.csv")

farmacie_df.apply(lambda row : addTriples(row), axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
1655    None
1656    None
1657    None
1658    None
1659    None
Length: 1660, dtype: object

In [38]:
res = g.query('''SELECT (COUNT(?parafarmacia) AS ?cname) WHERE {?parafarmacia rdf:type sso:Farmacia .} ''')

for i in res:
    print(i)

(rdflib.term.Literal('1660', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')),)


In [6]:
#Parafarmacie
count = 0

def addTriples(row):
    global count
    code = 'PF'+f'{count:04}'
    #sostituire con codice farmacia/parafarmacia piuttosto che nome 
    res = URIRef(urify(base_uri, code))
    g.add([res, URIRef(RDF.type), URIRef(sso.Parafarmacia)])
    g.add([res, sso.isIn, URIRef(urify(base_uri, row[2].title()))])

    if(row[3] != 0):
        g.add([res, sso.hasCap, Literal(row[3], datatype=XSD.integer) ])
        
    g.add([res, sso.hasAddress, Literal(row[4], datatype=XSD.string) ])
    g.add([res, sso.hasName, Literal(row[5], datatype=XSD.string) ])

    if(row[6] != 0 and row[7] != 0):
        g.add([res, sso.hasLatitude, Literal(row[6], datatype=XSD.decimal) ])
        g.add([res, sso.hasLongitude, Literal(row[7], datatype=XSD.decimal) ])
    g.add([res, sso.hasVatNumber, Literal(row[8], datatype=XSD.integer) ])
    count = count + 1

parafarmacie_df = pd.read_csv("../datasets/completed/parafarmacie.csv", sep=';')
parafarmacie_df["LATITUDINE"] = parafarmacie_df["LATITUDINE"].apply(lambda x : float(x.replace(',', '.')) if x != '-' else 0.0)
parafarmacie_df["LONGITUDINE"] = parafarmacie_df["LONGITUDINE"].apply(lambda x : float(x.replace(',', '.')) if x != '-' else 0.0)
parafarmacie_df["PARTITAIVA"] = parafarmacie_df["PARTITAIVA"].apply(lambda x : x if x != '-' else 0)


parafarmacie_df.apply(lambda row : addTriples(row), axis=1)

            

0       None
1       None
2       None
3       None
4       None
        ... 
1277    None
1278    None
1279    None
1280    None
1281    None
Length: 1282, dtype: object

In [None]:
len(parafarmacie_df.index)

In [None]:
res = g.query('''SELECT (COUNT(?parafarmacia) AS ?cname) WHERE {?parafarmacia rdf:type sso:Parafarmacia .} ''')

for i in res:
    print(i)

In [7]:
#Strutture sanitarie private
count = 0
def addTriples(row):
    global count
    code = 'SPVT'+f'{count:04}'
    #sostituire con codice struttura
    res = URIRef(urify(base_uri, code))
    g.add([res, URIRef(RDF.type), URIRef(sso.StrutturaPrivata)])

    if(row[0] != 0):
        g.add([res, sso.hasCap, Literal(row[0], datatype=XSD.integer) ])
        
    if(row[1] != 0 and row[2] != 0):
        g.add([res, sso.hasLatitude, Literal(row[1], datatype=XSD.decimal) ])
        g.add([res, sso.hasLongitude, Literal(row[2], datatype=XSD.decimal) ])
    
    g.add([res, sso.hasReferenceAsp, Literal(row[3], datatype=XSD.string)])

    if(row[5] != ''):
        g.add([res, sso.isIn, URIRef(urify(base_uri, row[5].title()))])

    if(row[6] != 0):
        g.add([res, sso.hasAddress, Literal(row[6], datatype=XSD.string) ])
        
    g.add([res, sso.hasName, Literal(row[7], datatype=XSD.string) ])
    g.add([res, sso.businessType, Literal(row[8], datatype=XSD.string)])
    g.add([res, sso.hasSiteType, Literal(row[9], datatype=XSD.string) ])
    count = count + 1

private_df = pd.read_csv("../datasets/completed/private.csv")
private_df["Citta\'"] = private_df["Citta\'"].astype(str)
private_df.apply(lambda row : addTriples(row), axis=1)


0       None
1       None
2       None
3       None
4       None
        ... 
2108    None
2109    None
2110    None
2111    None
2112    None
Length: 2113, dtype: object

In [8]:
#Strutture sanitarie pubbliche
import math

count = 0
def addTriples(row):
    global count
    code = 'SPUB'+f'{count:04}'
    res = URIRef(urify(base_uri, code))
    g.add([res, URIRef(RDF.type), URIRef(sso.StrutturaPubblica)])

    if(row[0] != ''):
        g.add([res, sso.isIn, URIRef(urify(base_uri, row[0].title()))])

    if(row[1] != 0 and row[2] != 0):
        g.add([res, sso.hasLatitude, Literal(row[1], datatype=XSD.decimal) ])
        g.add([res, sso.hasLongitude, Literal(row[2], datatype=XSD.decimal) ])
   
    g.add([res, sso.hasCap, Literal(row[3], datatype=XSD.integer) ])
    g.add([res, sso.hasAddress, Literal(row[4], datatype=XSD.string)])
    g.add([res, sso.hasName, Literal(row[5], datatype=XSD.string) ])
    g.add([res, sso.businessType, Literal(row[7], datatype=XSD.string)])
    g.add([res, sso.hasSiteType, Literal(row[8], datatype=XSD.string)])

    if row[9] != '':
        g.add([res, sso.isRecoveryStructure, Literal(True, datatype=XSD.boolean)])
        g.add([res, sso.hasRecoveryStructureName, Literal(row[10], datatype=XSD.string)])
    else:
        g.add([res, sso.isRecoveryStructure, Literal(False, datatype=XSD.boolean)])

    g.add([res, sso.workingMonths, Literal(row[10], datatype=XSD.integer)])
    g.add([res, sso.openingDays, Literal(row[12], datatype=XSD.integer)])
    g.add([res, sso.weeklyHours, Literal(row[13], datatype=XSD.integer)])
    g.add([res, sso.hasVatNumber, Literal(row[14], datatype=XSD.integer)])
    g.add([res, sso.hasEmail, Literal(row[15], datatype=XSD.string)])

    if row[16] != '':
        g.add([res, sso.hasWebsite, Literal(row[16], datatype=XSD.string)])
   
    if not math.isnan(row[17]):
        g.add([res, sso.hasPhonePrefix, Literal(row[17], datatype=XSD.integer)])
   
    if not math.isnan(row[18]):
        g.add([res, sso.hasPhoneNumber, Literal(row[18], datatype=XSD.integer)])
    
    if not math.isnan(row[19]):
        g.add([res, sso.hasFaxPrefix, Literal(row[19], datatype=XSD.integer)])
    
    if not math.isnan(row[20]):
        g.add([res, sso.hasFaxNumber, Literal(row[20], datatype=XSD.integer)])
    
    count = count + 1



pubbliche_df = pd.read_csv("../datasets/completed/pubbliche.csv")

pubbliche_df["Comune"] = pubbliche_df["Comune"].astype(str)
pubbliche_df["Denominazione struttura di ricovero"] = pubbliche_df["Denominazione struttura di ricovero"].astype(str)
pubbliche_df["Sito web"] = pubbliche_df["Sito web"].astype(str)

pubbliche_df.apply(lambda row : addTriples(row), axis=1)

g.serialize(destination='../datasets/rdf/sanitasicilia.ttl', 
            format='turtle')

<Graph identifier=N42290376ca614b7b8fec72037c11b7f9 (<class 'rdflib.graph.Graph'>)>

In [28]:
res = g.query('''SELECT ?name 
                 WHERE 
                {
                    ?struttura rdf:type sso:StrutturaPrivata .
                    ?struttura sso:hasName ?name .
                    ?struttura sso:isIn ?comune .
                    ?comune sso:hasName ?nomeC .
                    FILTER REGEX (?nomeC, "Palermo") .
                }
 ''')


In [30]:
for i in res:
    print(i)

(rdflib.term.Literal('CASA DI CURA COSENTINO', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('CENTRO ANDROS ', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('NUOVA CASA DI CURA DEMMA ', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('AIAS ONLUS  PALERMO - SEZIONE BEN HAUCAL', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('AIAS ONLUS PALERMO - CENTRO GRAMSCI', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('AIAS ONLUS PALERMO - CENTRO PARUTA ', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('AIAS ONLUS PALERMO - CENTRO RAITI', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')),)
(rdflib.term.Literal('AIAS ONLUS PALERMO - SEZIONE BERNINI', datatype=rdflib.term.URIRef('http://www.w3