In [1]:
'''
@wcrd
This script processes a ttl file and builds a ag-grid formatted tree view data source
'''

import rdflib
from typing import Tuple, List
import json

In [2]:

## INPUT FILES
SWITCH_PATH = "../data/ontology/switch.ttl"
BRICK_PATH = "../data/ontology/brick.ttl"

### LOAD INTO GRAPH

In [3]:

## LOAD FILES INTO GRAPH
ds = rdflib.Dataset(default_union=True)
# Generic namespace I am going to use to name the graphs
ds_ns = rdflib.Namespace("https://_graph_.com#")
# lets load it with data from brick and switch.
# this will add graph if it doesn't exist, or return the existing one.
# could also do this independently using add_graph() method.
ds.graph(ds_ns['brick']).parse(BRICK_PATH, format='turtle');
ds.graph(ds_ns['switch']).parse(SWITCH_PATH, format="turtle");


In [4]:
# generate namespaces
ds_g_ns = dict(ds.namespaces())
# check that graphs created.
contexts = ds.graphs()
print(f"Total DS entries: {len(ds)}")
print("Graphs created:")
for c in contexts:
    print(f"{c} ---- Entries: {len(ds.graph(c))}")

Total DS entries: 36502
Graphs created:
<https://_graph_.com#brick> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']. ---- Entries: 34243
<https://_graph_.com#switch> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']. ---- Entries: 2389
<urn:x-rdflib:default> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']. ---- Entries: 0


### GENERATE TREE

In [5]:
# URI PROCESSOR
def uri_spliter(uri: rdflib.term.URIRef, ns: dict = {}) -> Tuple[str, str, str]:
    '''
    Given a URI, split into namespace and term.
    If a ns dict is given, return the prefix for that namespace as well
    (ns, term, prefix)
    '''
    try:
        chunks = uri.toPython().split("#")
        namespace = chunks[0]
        term = chunks[1]
    except IndexError:
        print(f"URI did not resolve into parts: {uri}")
        return (None,None,None)
    except:
        raise ValueError(f"Error parsing URI: {uri}")
    
    if ns:
        try:
            prefix = next(key for key, value in ns.items() if value.toPython() == f"{namespace}#")
            return (namespace, term, prefix)
        except StopIteration:
            pass
    
    return (namespace, term, None)


In [49]:
# PARSE GRAPHS TO BUILD OUTPUT DATA
subjects = ds.subjects()
# filter out BNodes
subjects = set(filter(lambda x: not isinstance(x, rdflib.term.BNode), subjects))
# filter out all 'Brick Tags'
subjects = list(filter(lambda x: f"{uri_spliter(x)[0]}#" != ds_g_ns['tag'].toPython(), subjects))
print(len(subjects))

URI did not resolve into parts: http://qudt.org/vocab/unit/KiloTONNE
URI did not resolve into parts: http://qudt.org/vocab/unit/N-M
URI did not resolve into parts: http://qudt.org/vocab/unit/TON_LONG
URI did not resolve into parts: http://qudt.org/vocab/unit/OZ_F-IN
URI did not resolve into parts: http://qudt.org/vocab/unit/PERCENT-PER-WK
URI did not resolve into parts: http://qudt.org/vocab/unit/MicroBAR
URI did not resolve into parts: http://qudt.org/vocab/unit/MilliGM
URI did not resolve into parts: http://qudt.org/vocab/unit/TON_FG
URI did not resolve into parts: http://qudt.org/vocab/unit/MicroPA
URI did not resolve into parts: http://qudt.org/vocab/unit/U
URI did not resolve into parts: http://qudt.org/vocab/unit/FT-LB_F-PER-SEC
URI did not resolve into parts: http://qudt.org/vocab/unit/TeraW-HR
URI did not resolve into parts: http://qudt.org/vocab/unit/FT3-PER-DAY
URI did not resolve into parts: http://qudt.org/vocab/unit/BTU_TH
URI did not resolve into parts: http://qudt.org/vo

In [50]:
#### Path Generating Functions

def get_class_heirarchy(class_uri, graph, base_level = 1, depth_limit=15, initial_path=[]):
    '''
    :base_level: how far down the class heirarchy to return as 'base'. base_level=0 is the root (for brick, this is simply 'Class')
    '''
    depth = 0
    # # get type of entity
    # entity_class = list(graph.objects(entity, rdflib.RDF.type))
    # if len(entity_class)==0:
    #     raise ValueError(f"No class found in graph for entity {entity}.")

    # Check Class exists
    if (class_uri, None, None) not in graph:
        raise ValueError(f"No class found in graph for class {class_uri}.")

    path = _climb_class_heirarchy(class_uri, graph, depth_limit, depth, initial_path)

    try:
        # return path[base_level]
        return path
    except IndexError:
        print(f"Requested base level {base_level} does not exist. Returning level 0 instead")
        return path[0]


def _climb_class_heirarchy(entity_class, graph, depth_limit, depth, path=[]) -> list:
    '''
    Path is used for storing the journey to the root class. Users can request base class at level {n}. Base of all classes is 'Class' which is level 0.
    '''
    # guard
    if depth >= depth_limit:
        raise RecursionError(f"Max depth of {depth_limit} reached. Increase depth_limit parameter if required.")

    # add current class to path
    path = [entity_class] + path

    # get parent
    parents = list(graph.objects(entity_class, rdflib.RDFS.subClassOf))
    # filter out BNodes - they are not useful to us here. We want explicit class definitions only.
    parents = list(filter(lambda x: not isinstance(x, rdflib.term.BNode), parents))
    
    # take each path and climb
    if len(parents)==0:
        # print("I'm done with this path. Returning: ")
        # print([path])
        return [path]
    else:
        paths = []
        for p in parents:
            new_paths = _climb_class_heirarchy(p, graph, depth_limit, depth+1, path)
            for new_path in new_paths:
                paths.append(new_path)
    return paths

In [51]:
# PATH FORMATTER (SIMPLIFIER)
def path_2_termString(path: List[rdflib.term.URIRef]) -> List[str]:
    '''
    Given a list of URIRefs, resolve to a list of term strings (no namespace)
    '''
    output = []
    for uri in path:
        _, term, _ = uri_spliter(uri)
        output.append(term)
    
    return output


In [61]:
# generate path data and data row
data = []
for idx, s in enumerate(subjects):
    namespace, term, prefix = uri_spliter(s, ds_g_ns)


    ### GOING TO SKIP THIS FOR NOW AND JUST PROCESS ENTITIES IF THEY ARE A CLASS DEFINTION OR OBJECT PROPERTY (relationships).
    ### TODO: Come back and update this to handle shapes, entityProperties, etc.

    # # Check type -> If Class, then get class path. If instance, get class, then get class path
    # # need to filter self out
    # s_class = list(filter(lambda x: x != s, ds.objects(s, rdflib.RDF.type)))
    # if len(s_class)==0:
    #     raise ValueError(f"No class found in graph for entity {s}.")
    # elif len(s_class) > 1:
    #     # raise ValueError(f"Too many types exist for the class. Please review: {s}.")
    #     print(f"Too many types exist for the class. SKIPPING. Please review: {s}.")
    #     continue
    # else:
    #     if s_class[0] != rdflib.OWL.Class:
    #         # Set this as first path item
    #         initial_path = s_class
    #     else: 
    #         initial_path = []

    ### BASIC VERSION
    # Get subject type(s) [filter self out]
    s_class = list(filter(lambda x: x != s, ds.objects(s, rdflib.RDF.type)))
    # filter down to only owl.class and owl.objectproperty and shacl shapes -> these are what we will process
    s_class = list(filter(lambda x: x in [rdflib.OWL.Class, rdflib.OWL.ObjectProperty, rdflib.SH.NodeShape], s_class))
    if len(s_class)==0:
        print(f"No class found in graph for entity {s}. \nSKIPPING")
        continue
    elif rdflib.SH.NodeShape in s_class:
        # fixed path
        paths = [[rdflib.SH.NodeShape, s]]
    elif s_class[0] != rdflib.OWL.Class:
        # If not a class then 
        paths = [[*s_class, s]]
    else:
        paths = get_class_heirarchy(s, ds)

    #if multiple paths, make multiple rows.
    for p_idx, path in enumerate(paths):
        data.append({
            "uri": s,
            "namespace": namespace,
            "prefix": prefix,
            "term": term,
            "path": {
                "full": path,
                "agGridPath": path_2_termString(path)
            },
            "idx": f"{idx}.{p_idx}"
        })


URI did not resolve into parts: http://www.w3.org/ns/sosa/FeatureOfInterest
No class found in graph for entity https://brickschema.org/schema/Brick#ratedMinimumVoltageInput. 
SKIPPING
URI did not resolve into parts: http://qudt.org/vocab/unit/KiloTONNE
No class found in graph for entity http://qudt.org/vocab/unit/KiloTONNE. 
SKIPPING
URI did not resolve into parts: http://qudt.org/vocab/unit/N-M
No class found in graph for entity http://qudt.org/vocab/unit/N-M. 
SKIPPING
URI did not resolve into parts: http://www.w3.org/ns/sosa/FeatureOfInterest
URI did not resolve into parts: http://www.w3.org/ns/sosa/FeatureOfInterest
URI did not resolve into parts: http://qudt.org/vocab/unit/TON_LONG
No class found in graph for entity http://qudt.org/vocab/unit/TON_LONG. 
SKIPPING
URI did not resolve into parts: http://qudt.org/vocab/unit/OZ_F-IN
No class found in graph for entity http://qudt.org/vocab/unit/OZ_F-IN. 
SKIPPING
URI did not resolve into parts: http://qudt.org/vocab/unit/PERCENT-PER-WK


In [62]:
with open("data.json", "w") as f:
    json.dump(data, f, indent=2)

In [63]:
rdflib.SH.NodeShape

rdflib.term.URIRef('http://www.w3.org/ns/shacl#NodeShape')

In [89]:
data[1174]

{'uri': rdflib.term.URIRef('https://brickschema.org/schema/Brick#Power'),
 'namespace': 'https://brickschema.org/schema/Brick',
 'prefix': 'brick',
 'term': 'Power',
 'path': {'full': [rdflib.term.URIRef('https://brickschema.org/schema/Brick#Power')],
  'agGridPath': ['Power']},
 'idx': '943.0'}

In [39]:
uri_spliter(rdflib.URIRef("test#1"), ds_g_ns)

('test', '1')

In [26]:
ds_g_ns['brick'].toPython()

'https://brickschema.org/schema/Brick#'

In [42]:
x1 = subjects[1862]
x1

rdflib.term.URIRef('https://brickschema.org/schema/Brick#isPointOf')

In [43]:
o = list(filter(lambda x: x != x1, list(ds.objects(x1, rdflib.RDF.type)))) 
#== rdflib.OWL.Class
o

[rdflib.term.URIRef('http://www.w3.org/2002/07/owl#IrreflexiveProperty'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#ObjectProperty'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#AsymmetricProperty')]

In [44]:
rdflib.OWL.Class

rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class')

In [45]:
# filter down to only owl.class and owl.objectproperty
list(filter(lambda x: x in [rdflib.OWL.Class, rdflib.OWL.ObjectProperty], o))

[rdflib.term.URIRef('http://www.w3.org/2002/07/owl#ObjectProperty')]

In [54]:
get_class_heirarchy(o[1], ds)

ValueError: No class found in graph for class http://www.w3.org/2002/07/owl#ObjectProperty.

In [30]:
x.toPython().split("#")[0] 

'https://brickschema.org/schema/Brick'

In [67]:
path_2_termString(data[0]['path'])

['Class', 'Location', 'Space', 'Room', 'Food_Service_Room', 'Concession']

In [64]:
data[0]['path']

[rdflib.term.URIRef('https://brickschema.org/schema/Brick#Class'),
 rdflib.term.URIRef('https://brickschema.org/schema/Brick#Location'),
 rdflib.term.URIRef('https://brickschema.org/schema/Brick#Space'),
 rdflib.term.URIRef('https://brickschema.org/schema/Brick#Room'),
 rdflib.term.URIRef('https://brickschema.org/schema/Brick#Food_Service_Room'),
 rdflib.term.URIRef('https://brickschema.org/schema/Brick#Concession')]

In [90]:
ds_g_ns

{'xml': rdflib.term.URIRef('http://www.w3.org/XML/1998/namespace'),
 'rdf': rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
 'rdfs': rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#'),
 'xsd': rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#'),
 'brick': rdflib.term.URIRef('https://brickschema.org/schema/Brick#'),
 'bsh': rdflib.term.URIRef('https://brickschema.org/schema/BrickShape#'),
 'dcterms1': rdflib.term.URIRef('http://purl.org/dc/terms#'),
 'owl': rdflib.term.URIRef('http://www.w3.org/2002/07/owl#'),
 'qudt': rdflib.term.URIRef('http://qudt.org/schema/qudt/'),
 'qudtqk': rdflib.term.URIRef('http://qudt.org/vocab/quantitykind/'),
 'sdo': rdflib.term.URIRef('http://schema.org/'),
 'sh': rdflib.term.URIRef('http://www.w3.org/ns/shacl#'),
 'skos': rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#'),
 'sosa': rdflib.term.URIRef('http://www.w3.org/ns/sosa/'),
 'tag': rdflib.term.URIRef('https://brickschema.org/schema/BrickTag#'),
 'unit': rdfli

AttributeError: 'URIRef' object has no attribute 'namespace'