In [1]:
from rdflib import Graph as RDFGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
import networkx as nx
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections
import pandas as pd
from six.moves.urllib.parse import urldefrag

In [2]:
# RDF graph loading
path = "../data/rdf/DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-DG.ttl"

g = RDFGraph()
g.parse(path, format='turtle')

print("rdflib Graph loaded successfully with {} triples".format(len(g)))

rdflib Graph loaded successfully with 242910 triples


In [3]:
q = """
  SELECT ?a
  WHERE {
    ?a xl:type table:table .
  }"""
tables = [row[0] for row in g.query(q)]
tables.sort()
tables[0:5]

[rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2019-06/2019-12-31/tab/K000#dnb_tK000'),
 rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2019-06/2019-12-31/tab/M101#dnb_tM101')]

## Concept labels

In [4]:
q = """
  SELECT ?t ?label
  WHERE {
    ?x1 arcrole1:concept-label [xl:from ?t ;
                                xl:to [rdf:value ?label]] .
  }"""
concept_labels = {row[0]: row[1] for row in g.query(q)}

## DPM explicit domains

In [5]:
# A domain is a set of elements/values sharing a specified semantic nature. 
# Domain can be of one of two kinds: explicit and typed. 
# An explicit domain has its elements enumerated in the model while a typed domain values 
# are assigned in the reports based on a specified format (data type).

In [6]:
q = """
  SELECT DISTINCT ?t ?x1 ?x2 ?x4 ?x5
  WHERE {
    ?t rdf:type model:explicitDomainType .
    ?t xbrli:periodType ?x1 .
    ?t model:creationDate ?x2 .
    ?t xbrli:nillable ?x4 .
    ?t xbrli:abstract ?x5 .
  }"""

data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns = ['Domain uri', 
           'Domain label', 
           'period Type', 
           'creation Date', 
           'nillable', 
           'abstract']
df_domains = pd.DataFrame(data=data,
                          columns=columns)
df_domains.iloc[0:5]

Unnamed: 0,Domain uri,Domain label,period Type,creation Date,nillable,abstract
0,http://www.dnb.nl/xbrl/dict/exp#ftk_dom001,Contact,instant,2018-03-31,True,True
1,http://www.dnb.nl/xbrl/dict/exp#ftk_dom019,Commissies en organen,instant,2018-03-31,True,True
2,http://www.dnb.nl/xbrl/dict/exp#ftk_dom031,Toeslagverlening,instant,2018-03-31,True,True
3,http://www.dnb.nl/xbrl/dict/exp#ftk_dom015,Liquiditeiten,instant,2018-03-31,True,True
4,http://www.dnb.nl/xbrl/dict/exp#ftk_dom018,Dienstverlener,instant,2018-03-31,True,True


In [7]:
len(df_domains)

40

## DPM Dimensions

In [8]:
# In order to be used in description of information requirements a domain member 
# or a typed domain value requires a dimensionthat provides a context of its application. 
# In other words dimensions contextualise domain members when applied to a data point
# i.e. they contribute to the semantics of a member which, without a dimension,
# may be insufficient to represent the full meaning of a property.

In [9]:
q = """
  SELECT DISTINCT ?t ?domain
  WHERE {
    ?x2 arcrole7:dimension-domain [ xl:from ?t ;
                                    xl:to ?domain ] .
  }"""
data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns=['Dimension uri', 
         'Dimension label', 
         'Applicable domain code']
df_dimensions = pd.DataFrame(data=data, 
                             columns=columns)
df_dimensions

Unnamed: 0,Dimension uri,Dimension label,Applicable domain code
0,http://www.dnb.nl/xbrl/dict/dim#ftk_dim061,Kostenplaats,http://www.dnb.nl/xbrl/dict/exp#ftk_dom023
1,http://www.dnb.nl/xbrl/dict/dim#ftk_dim091,FTK_DIM091,http://www.dnb.nl/xbrl/dict/exp#ftk_dom034
2,http://www.dnb.nl/xbrl/dict/dim#ftk_dim002,Aandachtsgebied,http://www.dnb.nl/xbrl/dict/exp#ftk_dom002
3,http://www.dnb.nl/xbrl/dict/dim#ftk_dim022,FTK_DIM022,http://www.dnb.nl/xbrl/dict/exp#ftk_dom015
4,http://www.dnb.nl/xbrl/dict/dim#ftk_dim006,Solvabiliteitspost,http://www.dnb.nl/xbrl/dict/exp#ftk_dom005
...,...,...,...
69,http://www.dnb.nl/xbrl/dict/dim#ftk_dim101,Herstelplanpost,http://www.dnb.nl/xbrl/dict/exp#ftk_dom036
70,http://www.dnb.nl/xbrl/dict/dim#ftk_dim062,FTK_DIM062,http://www.dnb.nl/xbrl/dict/exp#ftk_dom023
71,http://www.dnb.nl/xbrl/dict/dim#ftk_dim064,Kostensoort,http://www.dnb.nl/xbrl/dict/exp#ftk_dom023
72,http://www.dnb.nl/xbrl/dict/dim#ftk_dim021,Liquiditeitsoort,http://www.dnb.nl/xbrl/dict/exp#ftk_dom015


## All domain members of all domains

In [10]:
# Elements of an explicit domain are called domain members. 
# A domain member (or simply a member) is enumerated element of an explicit domain.
# All members from a domain share a certain common nature defined subjectively
# but applied consistently by the model’s author.

In [11]:
def members(domain):
    q = """
      SELECT DISTINCT ?t ?label
      WHERE {
        ?l arcrole7:domain-member [ xl:from <"""+str(domain)+"""> ;
                                    xl:to ?t ] .
        ?t rdf:type nonnum:domainItemType .
        ?x arcrole1:concept-label [ xl:from ?t ;
                                    xl:to [rdf:value ?label ] ] .
        }"""
    return g.query(q)

df_members = pd.DataFrame()
for domain in df_domains.iloc[:, 0]:
    data = [[urldefrag(domain)[1]]+[urldefrag(row[0])[1]]+list(row[1:]) for row in members(domain)]
    columns = ['Domain',
               'Member',
               'Member label']
    df_members = df_members.append(pd.DataFrame(data=data,
                                                columns=columns))

In [12]:
len(df_members)

9384

## Template structures

In [13]:
def get_children(parent):
    q = '''
      SELECT DISTINCT ?t
      WHERE {
        ?s xl:from <'''+str(parent)+'''> .
        ?s xl:to ?t .
        ?l xl:from ?t .
      }'''
    return list(g.query(q))

def get_label(element, role):
    q = '''
      SELECT DISTINCT ?label
      WHERE {
        ?l arcrole3:element-label ?s .
        ?s xl:from <'''+str(element)+'''> .
        ?s xl:to [xlink:role '''+role+''' ;
                  rdf:value ?label] .
      }'''
    r = list(g.query(q))
    if r != []:
        return r[0][0]
    else:
        return ""

def search_tree(c, depth, axis, data):
    if c != []:
        for r in get_children(c):
            rccode = get_label(r[0], 'eurofiling:rc-code')
            label = get_label(r[0], 'role3:label')
            data.append([str(axis), depth, str(rccode), str(label)])
            search_tree(r[0], depth + 1, axis, data)
    return data

url = str(tables[1])
    
qres = g.query(
"""SELECT DISTINCT ?t ?axis
   WHERE {
   ?s xl:from <"""+url+"""> .
   ?s xl:axis ?axis .
   ?s xl:to ?t .
   }""")

data = list()

for row in qres:
    data = search_tree(row[0], 0, row[1], data)
columns = ['axis', 'depth', 'rc-code', 'label']
df_tables = pd.DataFrame(data=data,
                         columns=columns)
df_tables = df_tables.sort_values(['axis', 'rc-code']).reset_index(drop=True)

In [14]:
df_tables

Unnamed: 0,axis,depth,rc-code,label
0,x,0,,
1,x,1,10.0,Kolom
2,y,0,,
3,y,1,10.0,Technische voorzieningen voor risico fonds
4,y,1,20.0,Technische voorzieningen voor risico deelnemer
5,y,1,30.0,Technische voorzieningen afgedekt door garanti...
6,y,1,40.0,Aanwezige solvabiliteit
7,y,1,50.0,Dekkingsgraad (%)
8,y,1,60.0,Beleidsdekkingsgraad (%)
9,y,1,70.0,Algemene toelichting op de dekkingsgraadrappor...
