In [None]:
from rdflib import Graph as RDFGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
import networkx as nx
from networkx import Graph as NXGraph
import matplotlib.pyplot as plt
import statistics
import collections
import pandas as pd
from six.moves.urllib.parse import urldefrag
from pathlib import Path

from os import listdir 
from os.path import isfile, join, abspath, basename
import json

import xbrl2rdf

# create ttl file of qrs_260_instance

In [None]:
DATA_URL = join("..", "data", "instances", "qrs_260_instance.xbrl")
TAXO_URL = join("..", "data", "taxonomies", "EIOPA_SolvencyII_XBRL_Taxonomy_2.6.0_PWD_with_External_Files.zip")
OUTPUT_DIR = join("..", "data", "rdf")

# setup output directories
taxo_dir = ".".join(basename(TAXO_URL).split(".")[0:-1])
Path(join(OUTPUT_DIR, "data")).mkdir(parents=True, exist_ok=True)
Path(join(OUTPUT_DIR, "taxonomies", taxo_dir)).mkdir(parents=True, exist_ok=True)

xbrl2rdf.MainProcessor(DATA_URL, OUTPUT_DIR, 1)

# Read ttl

In [None]:
g = RDFGraph()

# RDF graph loading

# instance data
data = "../data/rdf/data/qrs_260_instance.ttl"
g.parse(data, format='turtle')

# taxonomy data
taxo = "../data/rdf/taxonomies/EIOPA_SolvencyII_XBRL_Taxonomy_2.6.0_PWD_with_External_Files/"
taxo_ttls = [join(taxo, f) for f in listdir(taxo) if isfile(join(taxo, f))]
for ttl in taxo_ttls:
    try:
        g.parse(ttl, format='turtle')
    except:
        print("Parse error: " + ttl)

print("rdflib Graph loaded successfully with {} triples".format(len(g)))

In [None]:
q = """
  SELECT ?c
  WHERE {
    ?a ?b ?c .
  }"""
a = list(set([h for h in g.query(q)]))
a.sort()
for item in a:
    print(item[0])

In [None]:
q = """
  SELECT ?a ?c
  WHERE {
    ?a xl:from ?c .
  }"""
a = list(set([h for h in g.query(q)]))
a.sort()
for item in a:
    print(item[1])

In [None]:
q = """
  SELECT ?a
  WHERE {
    ?a xl:type table:table .
  }"""
tables = [str(row[0]) for row in g.query(q)]
tables.sort()
tables[0:5]

## Concept labels

In [None]:
q = """
  SELECT ?t ?label
  WHERE {
    ?x1 <http://www.xbrl.org/2003/arcrole#concept-label> [xl:from ?t ;
                                xl:to [rdf:value ?label]] .
  }"""
concept_labels = {row[0]: row[1] for row in g.query(q)}

## DPM explicit domains

In [None]:
# A domain is a set of elements/values sharing a specified semantic nature. 
# Domain can be of one of two kinds: explicit and typed. 
# An explicit domain has its elements enumerated in the model while a typed domain values 
# are assigned in the reports based on a specified format (data type).

In [None]:
q = """
  SELECT DISTINCT ?t ?x1 ?x2 ?x4 ?x5
  WHERE {
    ?t rdf:type model:explicitDomainType .
    ?t xbrli:periodType ?x1 .
    ?t model:creationDate ?x2 .
    ?t xbrli:nillable ?x4 .
    ?t xbrli:abstract ?x5 .
  }"""

data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns = ['Domain uri', 
           'Domain label', 
           'period Type', 
           'creation Date', 
           'nillable', 
           'abstract']
df_domains = pd.DataFrame(data=data,
                          columns=columns)
df_domains.iloc[0:5]

In [None]:
len(df_domains)

## DPM Dimensions

In [None]:
# In order to be used in description of information requirements a domain member 
# or a typed domain value requires a dimensionthat provides a context of its application. 
# In other words dimensions contextualise domain members when applied to a data point
# i.e. they contribute to the semantics of a member which, without a dimension,
# may be insufficient to represent the full meaning of a property.

In [None]:
q = """
  SELECT DISTINCT ?t ?domain
  WHERE {
    ?x2 <http://xbrl.org/int/dim/arcrole#dimension-domain> 
        [ xl:from ?t ;
          xl:to ?domain ] .
  }"""
data = [[row[0], concept_labels[row[0]]]+list(row[1:]) for row in g.query(q)]
columns=['Dimension uri', 
         'Dimension label', 
         'Applicable domain code']
df_dimensions = pd.DataFrame(data=data, 
                             columns=columns)
df_dimensions

## All domain members of all domains

In [None]:
# Elements of an explicit domain are called domain members. 
# A domain member (or simply a member) is enumerated element of an explicit domain.
# All members from a domain share a certain common nature defined subjectively
# but applied consistently by the model’s author.

In [None]:
def members(domain):
    q = """
      SELECT DISTINCT ?t ?label
      WHERE {
        ?l <http://xbrl.org/int/dim/arcrole#domain-member>
            [ xl:from <"""+str(domain)+"""> ;
              xl:to ?t ] .
        ?t rdf:type nonnum:domainItemType .
        ?x <http://www.xbrl.org/2003/arcrole#concept-label>
            [ xl:from ?t ;
              xl:to [rdf:value ?label ] ] .
        }"""
    return g.query(q)

df_members = pd.DataFrame()
for domain in df_domains.iloc[:, 0]:
    data = [[urldefrag(domain)[1]]+[urldefrag(row[0])[1]]+list(row[1:]) for row in members(domain)]
    columns = ['Domain',
               'Member',
               'Member label']
    df_members = df_members.append(pd.DataFrame(data=data,
                                                columns=columns))

In [None]:
len(df_members)

## Template structures

In [None]:
def get_children(parent):
    q = '''
      SELECT DISTINCT ?t
      WHERE {
        ?s xl:from <'''+str(parent)+'''> .
        ?s xl:to ?t .
        ?l xl:from ?t .
      }'''
    return list(g.query(q))

def get_label(element, role):
    q = '''
      SELECT DISTINCT ?label
      WHERE {
        ?l <http://xbrl.org/arcrole/2008#element-label> ?s .
        ?s xl:from <'''+str(element)+'''> .
        ?s xl:to [xlink:role '''+role+''';
                  rdf:value ?label] .
      }'''
    r = list(g.query(q))
    if r != []:
        return r[0][0]
    else:
        return ""

def search_tree(c, depth, axis, data):
    if c != []:
        for r in get_children(c):
            rccode = get_label(r[0], '<http://www.eurofiling.info/xbrl/role#rc-code>')
            label = get_label(r[0], '<http://www.xbrl.org/2008/role#label>')
            data.append([str(axis), depth, str(rccode), str(label)])
            search_tree(r[0], depth + 1, axis, data)
    return data

url = "<http://eiopa.europa.eu/xbrl/s2md/fws/solvency/solvency2/2021-07-15/tab/S.02.01.02.01#s2md_tS.02.01.02.01>"
    
qres = g.query(
    """SELECT DISTINCT ?t ?axis
       WHERE {
       ?s xl:from """+url+""" .
       ?s xl:axis ?axis .
       ?s xl:to ?t .
       }""")
data = list()

for row in qres:
    data = search_tree(row[0], 0, row[1], data)
columns = ['axis', 'depth', 'rc-code', 'label']
df_tables = pd.DataFrame(data=data,
                         columns=columns)
df_tables = df_tables.sort_values(['axis', 'rc-code']).reset_index(drop=True)

In [None]:
df_tables[0:25]