In [2]:
import rdflib
from rdflib import Graph as RDFGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
import networkx as nx
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections
import pandas as pd
from six.moves.urllib.parse import urldefrag
from pathlib import Path

from os import listdir 
from os.path import isfile, join, abspath, basename
import json

import xbrl2rdf

In [3]:
DATA_URL = join("..", "data", "instances", "DNB-NR_FTK-2020-06_2020-12-31_MOD_FTK-BEL.XBRL")
OUTPUT_DIR = join("..", "data", "rdf")

# setup output directories
Path(join(OUTPUT_DIR, "data")).mkdir(parents=True, exist_ok=True)

xbrl2rdf.MainProcessor(DATA_URL, OUTPUT_DIR, 1)

0

In [4]:
g = RDFGraph()

# RDF graph loading

# instance data
data = "../data/rdf/data/DNB-NR_FTK-2020-06_2020-12-31_MOD_FTK-BEL.ttl"
g.parse(data, format='turtle')

# taxonomy data reporting framework
taxo = "../data/rdf/taxonomies/dnb-ftk-reporting-framework-2-3-0-2020-12-31/"
taxo_ttls = [join(taxo, f) for f in listdir(taxo) if isfile(join(taxo, f))]
for ttl in taxo_ttls:
    try:
        g.parse(ttl, format='turtle')
    except:
        print("Parse error: " + ttl)

# taxonomy data data dictionary
taxo = "../data/rdf/taxonomies/dnb-ftk-reporting-data-dictionary-2-3-0/"
taxo_ttls = [join(taxo, f) for f in listdir(taxo) if isfile(join(taxo, f))]
for ttl in taxo_ttls:
    try:
        g.parse(ttl, format='turtle')
    except:
        print("Parse error: " + ttl)
        
print("rdflib Graph loaded successfully with {} triples".format(len(g)))

Parse error: ../data/rdf/taxonomies/dnb-ftk-reporting-framework-2-3-0-2020-12-31/assertions_ftk_TG-DG1-10ad9b6907d79f04eb62f34ed6560e19.ttl
rdflib Graph loaded successfully with 3051764 triples


In [5]:
q = """
  SELECT ?a
  WHERE {
    ?a xl:type table:table .
  }"""
tables = [row[0] for row in g.query(q)]
tables.sort()
tables[0:5]

[rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF001-1#dnb_tAPF001-1'),
 rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF001-2#dnb_tAPF001-2'),
 rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF002#dnb_tAPF002'),
 rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF003#dnb_tAPF003'),
 rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF004#dnb_tAPF004')]

## Concept labels

In [6]:
q = """
  SELECT ?t ?label
  WHERE {
    ?x1 <http://www.xbrl.org/2003/arcrole#concept-label>
        [xl:from ?t ;
         xl:to [rdf:value ?label]] .
  }"""
concept_labels = {row[0]: row[1] for row in g.query(q)}

In [13]:
len(concept_labels)

3732

## DPM explicit domains

In [9]:
# A domain is a set of elements/values sharing a specified semantic nature. 
# Domain can be of one of two kinds: explicit and typed. 
# An explicit domain has its elements enumerated in the model while a typed domain values 
# are assigned in the reports based on a specified format (data type).

In [9]:
q = """
  SELECT DISTINCT ?t ?x1 ?x2 ?x4 ?x5
  WHERE {
    ?t rdf:type model:explicitDomainType .  
    ?t xbrli:periodType ?x1 .
    ?t model:creationDate ?x2 .
    ?t xbrli:nillable ?x4 .
    ?t xbrli:abstract ?x5 .
  }"""

data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns = ['Domain uri', 
           'Domain label', 
           'period Type', 
           'creation Date', 
           'nillable', 
           'abstract']
df_domains = pd.DataFrame(data=data,
                          columns=columns)
df_domains.iloc[0:5]

Unnamed: 0,Domain uri,Domain label,period Type,creation Date,nillable,abstract
0,http://www.dnb.nl/xbrl/dict/exp#ftk_dom010,Ratingklasse,instant,2018-03-31,True,True
1,http://www.dnb.nl/xbrl/dict/exp#ftk_dom017,Organisatie,instant,2018-03-31,True,True
2,http://www.dnb.nl/xbrl/dict/exp#ftk_dom024,Risicodrager,instant,2018-03-31,True,True
3,http://www.dnb.nl/xbrl/dict/exp#ftk_dom008,Waarderingstype,instant,2018-03-31,True,True
4,http://www.dnb.nl/xbrl/dict/exp#ftk_dom029,FTK_DOM029,instant,2018-03-31,True,True


In [10]:
len(df_domains)

40

## DPM Dimensions

In [11]:
# In order to be used in description of information requirements a domain member 
# or a typed domain value requires a dimensionthat provides a context of its application. 
# In other words dimensions contextualise domain members when applied to a data point
# i.e. they contribute to the semantics of a member which, without a dimension,
# may be insufficient to represent the full meaning of a property.

In [12]:
q = """
  SELECT DISTINCT ?t ?domain
  WHERE {
    ?x2 <http://xbrl.org/int/dim/arcrole#dimension-domain> 
        [ xl:from ?t ;
          xl:to ?domain ] .
  }"""
data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns=['Dimension uri', 
         'Dimension label', 
         'Applicable domain code']
df_dimensions = pd.DataFrame(data=data, 
                             columns=columns)
df_dimensions

KeyError: rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim003')

In [16]:
list(g.query(q))

[(rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim003'),
  rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/K602#dnb_ftk_dom003')),
 (rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim100'),
  rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/K602#dnb_ftk_dom035')),
 (rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim008'),
  rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/J301-1#dnb_ftk_dom002')),
 (rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim002'),
  rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/J605-1#dnb_ftk_dom002')),
 (rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim002'),
  rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/J601-3#dnb_ftk_dom002')),
 (rdflib.term.URIRef('http://www.dnb.nl/xbrl/dict/dim#dnb_ftk_dim005'),
  rdflib.term.URIRef('http:

## All domain members of all domains

In [None]:
# Elements of an explicit domain are called domain members. 
# A domain member (or simply a member) is enumerated element of an explicit domain.
# All members from a domain share a certain common nature defined subjectively
# but applied consistently by the model’s author.

In [17]:
def members(domain):
    q = """
      SELECT DISTINCT ?t ?label
      WHERE {
        ?l <http://xbrl.org/int/dim/arcrole#domain-member>
            [ xl:from <"""+str(domain)+"""> ;
              xl:to ?t ] .
        ?t rdf:type nonnum:domainItemType .
        ?x <http://www.xbrl.org/2003/arcrole#concept-label>
            [ xl:from ?t ;
              xl:to [rdf:value ?label ] ] .
        }"""
    return g.query(q)

df_members = pd.DataFrame()
for domain in df_domains.iloc[:, 0]:
    data = [[urldefrag(domain)[1]]+[urldefrag(row[0])[1]]+list(row[1:]) for row in members(domain)]
    columns = ['Domain',
               'Member',
               'Member label']
    df_members = df_members.append(pd.DataFrame(data=data,
                                                columns=columns))

In [18]:
len(df_members)

9196

In [20]:
namespaces = {"xl": 'http://www.xbrl.org/2003/XLink#',
              "xlink": "http://www.w3.org/1999/xlink#",
              "formula": "http://xbrl.org/2008/formula#",
              "rdf": 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}

q_label = rdflib.plugins.sparql.prepareQuery('''
      SELECT DISTINCT ?label
      WHERE {
        ?l <http://xbrl.org/arcrole/2008#element-label> ?s .
        ?s xl:from ?element .
        ?s xl:to [xlink:role ?role ; rdf:value ?label] .
        }''', 
      initNs = namespaces)
q_children = rdflib.plugins.sparql.prepareQuery('''
      SELECT DISTINCT ?t
      WHERE {
        ?s xl:from ?parent .
        ?s xl:to ?t .
      }''',
      initNs = namespaces)
q_concept = rdflib.plugins.sparql.prepareQuery('''
      SELECT DISTINCT ?c
      WHERE {
        OPTIONAL { ?item formula:concept ?c } .
      }''',
      initNs = namespaces)
q_dim = rdflib.plugins.sparql.prepareQuery('''
      SELECT DISTINCT ?d_name ?m_name
      WHERE {
        OPTIONAL { ?item formula:explicitDimension [
                         formula:dimension ?d_name ; 
                         formula:member ?m_name ] }
      }''',
      initNs = namespaces)
q_axis_points = rdflib.plugins.sparql.prepareQuery('''
      SELECT DISTINCT ?t ?axis
      WHERE {
        ?s xl:from ?table_url .
        ?s xl:axis ?axis .
        ?s xl:to ?t .
      }''', initNs = namespaces)

def search_tree(c, depth, axis, data):
    if c != []:
        for r in g.query(q_children, initBindings={'parent': c}):
            rccode = list(g.query(q_label, initBindings={'element': r[0], 'role': role_rc}))
            label = list(g.query(q_label, initBindings={'element': r[0], 'role': role_label}))
            if rccode != []:
                rccode = str(rccode[0][0])
            else:
                rccode = None
            if label != []:
                label = str(label[0][0])
            else:
                label = None
            concept_data = list(g.query(q_concept, initBindings={'item': r[0]}))
            if concept_data == []:
                concept_data = [None]
            dim_data = list(g.query(q_dim, initBindings={'item': r[0]}))
            if dim_data == []:
                dim_data = [(None, None)]
            data.append([str(axis), depth, str(rccode), str(label), concept_data[0], dim_data])
            search_tree(r[0], depth+1, axis, data)
    return data

In [21]:
role_rc = rdflib.URIRef("http://www.eurofiling.info/xbrl/role#rc-code")
role_label = rdflib.URIRef("http://www.xbrl.org/2008/role#label")

table_url = rdflib.term.URIRef('http://www.dnb.nl/xbrl/fws/dnb-nr/ftk-2020-06/2020-12-31/tab/APF001-1#dnb_tAPF001-1')

axis_points = list()
for axis in ['x', 'y', 'z']:
    axis_url = rdflib.term.Literal(axis, datatype=rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral'))
    axis_points.append(g.query(q_axis_points, initBindings={'table_url': table_url, 'axis': axis_url}))

data_x = list()
for row in axis_points[0]:
    data_x = search_tree(row[0], 0, row[1], data_x)

data_y = list()
for row in axis_points[1]:
    data_y = search_tree(row[0], 0, row[1], data_y)

data_z = list()
for row in axis_points[2]:
    data_z = search_tree(row[0], 0, row[1], data_z)
    
columns = ['axis', 'depth', 'rc-code', 'label', 'concept', 'dimension-member']
df_tables = pd.DataFrame(data=data_x+data_y,
                         columns=columns)
df_tables = df_tables.sort_values(['axis', 'rc-code']).reset_index(drop=True)

In [22]:
df_tables

Unnamed: 0,axis,depth,rc-code,label,concept,dimension-member
0,x,1,10.0,Alfanumeriek,,"[(http://www.dnb.nl/xbrl/dict/dim#ftk_dim008, ..."
1,x,0,,,,"[(None, None)]"
2,x,0,,,,"[(None, None)]"
3,x,0,,,,"[(None, None)]"
4,x,2,,,,"[(None, None)]"
5,x,2,,,,"[(None, None)]"
6,x,2,,,,"[(None, None)]"
7,x,2,,,,"[(None, None)]"
8,y,1,10.0,Aantal medewerkers (in fte's) in dienst van he...,"(http://www.dnb.nl/xbrl/dict/met#ii_ftk_064,)","[(http://www.dnb.nl/xbrl/dict/dim#ftk_dim030, ..."
9,y,0,,,,"[(None, None)]"
