In [34]:
from os import listdir
from os.path import join, isfile
import pandas as pd
from rdflib import URIRef, term, Graph, Literal, Namespace
from rdflib.namespace import OWL,RDF, RDFS, SKOS, XSD

In [35]:
EIOPA_DATA_PATH = join("..", "data", "external", "eiopa")
GLEIF_DATA_PATH = join("..", "data", "external", "gleif")

# Read EIOPA Register in RDF

In [36]:
g = Graph()

with open(join(EIOPA_DATA_PATH,'eiopa_register.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')

with open(join(GLEIF_DATA_PATH,'gleif-L1-extract.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')
    
with open(join(GLEIF_DATA_PATH,'EntityLegalFormData.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')

print("graph has {} statements.".format(len(g)))

graph has 375388 statements.


# Example SPARQL queries

In [23]:
# function to visualize the name of the uri without namespace
def get_name(uri):
    if isinstance(uri, term.URIRef):
        return uri.n3().split("/")[-1][0:-1]
    else:
        return uri

## Extracting information in the register

In [24]:
query = """SELECT DISTINCT ?s ?p ?o
           WHERE {?s gleif-L1:hasLegalName "Achmea Schadeverzekeringen N.V." . 
                  ?s ?p ?o .
}"""
results = g.query(query)

for row in results:
    for item in row:
        print(get_name(item) + " ", end = '')
    print("")

L-72450067SU8C745IAV11 hasHeadquartersAddress L-72450067SU8C745IAV11-LAL 
L-72450067SU8C745IAV11 hasLegalJurisdiction NL 
L-72450067SU8C745IAV11 owl#a InsuranceUndertaking 
L-72450067SU8C745IAV11 hasLegalForm ELF-B5PM 
L-72450067SU8C745IAV11 hasLegalAddress L-72450067SU8C745IAV11-LAL 
L-72450067SU8C745IAV11 hasRegistrationIdentifier BID-RA000463-08053410 
L-72450067SU8C745IAV11 hasLegalName Achmea Schadeverzekeringen N.V. 
L-72450067SU8C745IAV11 hasRegisterIdentifier IURI-De-Nederlandsche-Bank-W1686 
L-72450067SU8C745IAV11 hasEntityStatus EntityStatusActive 
L-72450067SU8C745IAV11 22-rdf-syntax-ns#type LegalEntity 


In [25]:
query = """SELECT DISTINCT ?lf ?p ?o
           WHERE {?s gleif-L1:hasLegalName "Achmea Schadeverzekeringen N.V." . 
                  ?s gleif-L1:hasLegalForm ?lf .
                  ?lf ?p ?o .}"""
results = g.query(query)

for row in results:
    for item in row:
        print(get_name(item) + " ", end = '')
    print("")

ELF-B5PM hasAbbreviationLocal nv 
ELF-B5PM 22-rdf-syntax-ns#type EntityLegalForm 
ELF-B5PM hasCoverageArea NL 
ELF-B5PM hasAbbreviationLocal n.v. 
ELF-B5PM hasNameLocal naamloze vennootschap 
ELF-B5PM hasAbbreviationLocal NV 
ELF-B5PM hasNameTransliterated naamloze vennootschap 
ELF-B5PM 22-rdf-syntax-ns#type EntityLegalFormIdentifier 
ELF-B5PM identifies ELF-B5PM 
ELF-B5PM tag B5PM 
ELF-B5PM hasAbbreviationLocal N.V. 


In [29]:
query = """SELECT DISTINCT ?p ?o
           WHERE {?s gleif-L1:hasLegalName "Achmea Schadeverzekeringen N.V." . 
                  ?s eiopa-Base:hasRegisterIdentifier ?lf .
                  ?lf ?p ?o .
}"""
results = g.query(query)

for row in results:
    for item in row:
        print(get_name(item) + " ", end = '')
    print("")

hasRegistrationStartDate 23/12/1991 01:00:00 
hasNCA De Nederlandsche Bank 
identifies L-72450067SU8C745IAV11 
owl#a InsuranceUndertakingRegisterIdentifier 
hasCrossBorderStatus Domestic undertaking 
hasEUCountryWhereEntityOperates NL 
hasInsuranceUndertakingID W1686 
hasOperationStartDate 23/12/1991 01:00:00 
hasOperationEndDate nan 
hasRegistrationEndDate nan 


## Extracting geographical location of entities

The following query extracts the geographical location of all insurance undertakings within NL

In [30]:
query = """SELECT DISTINCT ?name ?lat ?long
           WHERE {?s gleif-base:hasLegalJurisdiction CountryCodes:NL ; 
                     gleif-L1:hasLegalName ?name ;
                     gleif-L1:hasLegalAddress/gleif-base:hasCity ?city .
                  ?geo gleif-base:hasCity ?city ; 
                       geo:lat ?lat ; 
                       geo:long ?long .}"""
results = g.query(query)

points = list()
for row in results:
    new = (row[0].value, float(row[1].value), float(row[2].value))
    if new not in points:
        points.append(new)

In [31]:
len(results)

392

## Plotting geographical location

In [32]:
import pandas as pd
import folium

In [33]:
ave_lat = sum(p[1] for p in points)/len(points)
ave_lon = sum(p[2] for p in points)/len(points)

# Load map centred on average coordinates
my_map = folium.Map(location=[ave_lat, ave_lon], zoom_start=8, tiles='Stamen Terrain')

#add a markers
for each in points:  
    folium.Marker((each[1], each[2]), popup=each[0]).add_to(my_map)

my_map.save("eiopa_register_nl.html")