<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Integrate RDF file
    </h1>
    <p>
        Link to dataset : <a href="TR/owl-guide/wine.rdf">Link to Wine RDF file download</a>
    </p>
</div>

# Import modules and functions

In [1]:
from rdflib import Graph as RDFGraph
import re
import time

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

# Check data files are available

In [3]:
example_name = "wine_ontology"
path_data = f"{os.getcwd()}/data/{example_name}"
if not os.path.exists(path_data):
    print(f"{path_data} does not exist")
    os.makedirs(path_data)
    print(f"{path_data} folder created âœ”")

list_files = sorted(os.listdir(path_data))
rdf_filename = "wine_ontology.rdf"
if not rdf_filename in list_files:
    print(f"Wine ontology RDF file is not available in {path_data}. \n")
    url_rdf = "https://www.w3.org/TR/owl-guide/wine.rdf"
    print(
        f"Downloading from: {url_rdf}"
        "\n...\n"
    )
    os.system(f"wget {url_rdf} -O {path_data}/{rdf_filename}")

# Load dataset

In [4]:
# RDF graph loading
path_rdf = f"{path_data}/{rdf_filename}"

rg = RDFGraph()
rg.parse(path_rdf)
print(f"RDF file successfully loaded with {len(rg)} triples")

RDF file successfully loaded with 1839 triples


In [5]:
for obj in list(rg.objects())[:10]:
    print(f"Object: {obj}")
    print(f"Type:   {type(obj)}")
    print()

Object: http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Dry
Type:   <class 'rdflib.term.URIRef'>

Object: N4049a69e82b14a21b8080f190f324b9b
Type:   <class 'rdflib.term.BNode'>

Object: http://www.w3.org/2002/07/owl#Class
Type:   <class 'rdflib.term.URIRef'>

Object: http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#PinotNoir
Type:   <class 'rdflib.term.URIRef'>

Object: N6b0a9c9ea8434e15bbf37ebe217a6704
Type:   <class 'rdflib.term.BNode'>

Object: http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Dry
Type:   <class 'rdflib.term.URIRef'>

Object: N215cc0b118f04f1c94a95296d4e3d572
Type:   <class 'rdflib.term.BNode'>

Object: http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Delicate
Type:   <class 'rdflib.term.URIRef'>

Object: http://www.w3.org/2002/07/owl#Restriction
Type:   <class 'rdflib.term.URIRef'>

Object: http://www.w3.org/2002/07/owl#Class
Type:   <class 'rdflib.term.URIRef'>



In [6]:
print(obj)

http://www.w3.org/2002/07/owl#Class


# Graph Creation in TuringDB

## Build Cypher CREATE Commands

### Sanitize Cypher query

In [7]:
def sanitize_cypher_query(query):
    query = query.replace("#", "_")
    query = query.replace("-", "_")

    return query

### TuringDB v2 (not implemented yet)

In [8]:
def rdf_to_cypher_v2(rdf_file, batch_size=1000):
    from rdflib import Graph
    
    g = Graph()
    g.parse(rdf_file)

    #for s, p, o in g:
    #    print(f"""s: {s}\np: {p}\no: {o}""")
    #    print()
    #
    #return
    
    nodes = {str(s) for s, _, _ in g} | {str(o) for _, _, o in g if not isinstance(o, str) or o.startswith('http')}
    
    for i in range(0, len(nodes), batch_size):
        batch = list(nodes)[i:i+batch_size]
        yield "CREATE " + ",\n".join(
            f"(:{node.split('/')[-1]} {{uri: '{node}'}})" 
            for node in batch
        )
    
    for s, p, o in g:
        if not isinstance(o, str) or o.startswith('http'):
            yield f"MATCH (a {{uri: '{s}'}}), (b {{uri: '{o}'}}) CREATE (a)-[:{str(p).split('/')[-1]}]->(b)"

In [9]:
%%time

# Create Cypher CREATE command for TuringDB v2
graph_CREATE_command_v2 = '\n'.join(rdf_to_cypher_v2(path_rdf))
graph_CREATE_command_v2 = sanitize_cypher_query(graph_CREATE_command_v2)
print(graph_CREATE_command_v2)

CREATE (:wine_GaryFarrellMerlot {uri: 'http://www.w3.org/TR/2003/PR_owl_guide_20031209/wine_GaryFarrellMerlot'}),
(:N6f3a10c292c446e98e694645c3d288e7 {uri: 'N6f3a10c292c446e98e694645c3d288e7'}),
(:N0f34ecc800844be2bd19996591433ffe {uri: 'N0f34ecc800844be2bd19996591433ffe'}),
(:N643c237a054c4c0785d74d87f9a28147 {uri: 'N643c237a054c4c0785d74d87f9a28147'}),
(:wine_GamayGrape {uri: 'http://www.w3.org/TR/2003/PR_owl_guide_20031209/wine_GamayGrape'}),
(:N2835ab8aabfb4315ae199f38fb1e3c78 {uri: 'N2835ab8aabfb4315ae199f38fb1e3c78'}),
(:N51497b936600455c9f1d1a175e35c566 {uri: 'N51497b936600455c9f1d1a175e35c566'}),
(:wine_FrenchWine {uri: 'http://www.w3.org/TR/2003/PR_owl_guide_20031209/wine_FrenchWine'}),
(:Nfde46439f9ec4842b33eaeb88a0965d1 {uri: 'Nfde46439f9ec4842b33eaeb88a0965d1'}),
(:wine_Rose {uri: 'http://www.w3.org/TR/2003/PR_owl_guide_20031209/wine_Rose'}),
(:Nc615edae9a4d45db995660f963b0852d {uri: 'Nc615edae9a4d45db995660f963b0852d'}),
(:wine_Margaux {uri: 'http://www.w3.org/TR/2003/PR_o

### TuringDB v1

In [10]:
def rdf_to_cypher_v1(rdf_file):
    from rdflib import Graph
    
    g = Graph()
    g.parse(rdf_file)
    
    # Collect nodes
    nodes = {str(s) for s, _, _ in g} | {str(o) for _, _, o in g if not isinstance(o, str) or o.startswith('http')}
    node_to_var = {node: f"n{i}" for i, node in enumerate(nodes)}
    
    parts = []
    
    # Add all nodes
    for node in nodes:
        var = node_to_var[node]
        label = node.split('/')[-1]
        # Prefix label with underscore if it starts with a digit
        if label and label[0].isdigit():
            label = 'ID_' + label
        parts.append(f"({var}:{label} {{uri: '{node}'}})")
    
    # Add all edges
    for s, p, o in g:
        if not isinstance(o, str) or o.startswith('http'):
            s_var = node_to_var[str(s)]
            o_var = node_to_var[str(o)]
            pred = str(p).split('/')[-1]
            # Prefix predicate with underscore if it starts with a digit
            if pred and pred[0].isdigit():
                pred = 'ID_' + pred
            parts.append(f"({s_var})-[:{pred}]->({o_var})")
    
    return "CREATE " + ",\n".join(parts)

In [11]:
def sanitize_identifier(s):
    """Sanitize identifiers (labels, relationship types, property names)"""
    if not s:
        return "ID_empty"
    
    # Replace invalid characters with underscores
    s = s.replace('#', '_').replace('-', '_').replace('/', '_').replace('.', '_')
    
    # Prefix with ID_ if starts with digit or underscore
    if s[0].isdigit() or s[0] == '_':
        s = 'ID_' + s
    
    # Remove any other non-alphanumeric characters except underscores
    s = ''.join(c if c.isalnum() or c == '_' else '_' for c in s)
    
    return s

def sanitize_value(s):
    """Sanitize property values (escape quotes)"""
    return s.replace("'", "\\'").replace('"', '\\"')

def rdf_to_cypher_v1(rdf_file):
    from rdflib import Graph
    
    g = Graph()
    g.parse(rdf_file)
    
    # Collect nodes
    nodes = {str(s) for s, _, _ in g} | {str(o) for _, _, o in g if not isinstance(o, str) or o.startswith('http')}
    node_to_var = {node: f"n{i}" for i, node in enumerate(nodes)}
    
    parts = []
    
    # Add all nodes
    for node in nodes:
        var = node_to_var[node]
        label = sanitize_identifier(node.split('/')[-1])
        uri_value = sanitize_value(node)
        parts.append(f"({var}:{label} {{uri: '{uri_value}'}})")
    
    # Add all edges
    for s, p, o in g:
        if not isinstance(o, str) or o.startswith('http'):
            s_var = node_to_var[str(s)]
            o_var = node_to_var[str(o)]
            pred = sanitize_identifier(str(p).split('/')[-1])
            parts.append(f"({s_var})-[:{pred}]-({o_var})")
    
    return "CREATE " + ",\n".join(parts)

In [12]:
def rdf_to_cypher_v1(rdf_file, include_blank_nodes=False):
    from rdflib import Graph, RDF, RDFS, OWL, Literal
    
    g = Graph()
    g.parse(rdf_file)
    
    # Helper function to detect blank nodes
    def is_blank_node(uri):
        # Blank nodes in RDF are just the hash without http://
        return not uri.startswith('http://') and not uri.startswith('https://')
    
    # Collect all nodes (exclude literal values)
    nodes = {str(s) for s, _, _ in g} | {str(o) for _, _, o in g if not isinstance(o, Literal)}
    
    # Filter out blank nodes if requested
    if not include_blank_nodes:
        nodes = {n for n in nodes if not is_blank_node(n)}
    
    node_to_var = {node: f"n{i}" for i, node in enumerate(nodes)}
    
    # Map nodes to their types (classes)
    node_types = {}
    for s, p, o in g:
        if p == RDF.type and not isinstance(o, Literal):
            node_types[str(s)] = str(o)
    
    parts = []
    
    # Add all nodes
    for node in nodes:
        var = node_to_var[node]
        
        # Use the node's own name as label
        label = sanitize_identifier(node.split('/')[-1].split('#')[-1])
        
        # Extract meaningful name (without sanitization for readability)
        name = node.split('/')[-1].split('#')[-1]
        
        # Use generic label for blank nodes
        if is_blank_node(node):
            label = "BlankNode"
            name = "BlankNode"
        
        uri_value = sanitize_value(node)
        name_value = sanitize_value(name)
        
        # Add type as a property if available
        if node in node_types:
            type_value = sanitize_value(node_types[node])
            parts.append(f"({var}:{label} {{uri: '{uri_value}', name: '{name_value}', type: '{type_value}'}})")
        else:
            parts.append(f"({var}:{label} {{uri: '{uri_value}', name: '{name_value}'}})")
    
    # Add all edges (exclude literals and blank nodes if requested)
    for s, p, o in g:
        if not isinstance(o, Literal):
            # Skip edges involving blank nodes if not included
            if str(s) not in node_to_var or str(o) not in node_to_var:
                continue
            
            s_var = node_to_var[str(s)]
            o_var = node_to_var[str(o)]
            pred = sanitize_identifier(str(p).split('/')[-1].split('#')[-1])
            parts.append(f"({s_var})-[:{pred}]->({o_var})")

    return "CREATE " + ",\n".join(parts)

In [13]:
%%time

# Create Cypher CREATE command for TuringDB v1
graph_CREATE_command_v1 = rdf_to_cypher_v1(path_rdf, include_blank_nodes=False)
#print(graph_CREATE_command_v1)

# Save Cypher query to file
with open(f"{path_data}/{example_name}.cypher", "w") as f:
    f.write(graph_CREATE_command_v1)

CPU times: user 89 ms, sys: 6.89 ms, total: 95.9 ms
Wall time: 94.4 ms


In [14]:
%%time

# Create Cypher CREATE command for TuringDB v1
graph_CREATE_command_v1_whole_ontology = rdf_to_cypher_v1(path_rdf, include_blank_nodes=True)
#print(graph_CREATE_command_v1_whole_ontology)

# Save Cypher query to file
with open(f"{path_data}/{example_name}_whole_ontology.cypher", "w") as f:
    f.write(graph_CREATE_command_v1_whole_ontology)

CPU times: user 95 ms, sys: 13.7 ms, total: 109 ms
Wall time: 98.3 ms


## tree - New version 

In [41]:
def rdf_to_cypher(rdf_file, include_blank_nodes=False):
    from rdflib import Graph, RDF, RDFS, OWL, Literal
    
    g = Graph()
    g.parse(rdf_file)
    
    # Helper function to detect blank nodes
    def is_blank_node(uri):
        return not uri.startswith('http://') and not uri.startswith('https://')
    
    # Build class hierarchy map
    def build_class_hierarchy():
        """Build a map of class -> parent class(es) from subClassOf and intersectionOf"""
        hierarchy = {}
        
        # Handle rdfs:subClassOf
        for s, p, o in g:
            if p == RDFS.subClassOf and not isinstance(o, Literal):
                if not is_blank_node(str(o)):
                    child = str(s)
                    parent = str(o)
                    if child not in hierarchy:
                        hierarchy[child] = []
                    hierarchy[child].append(parent)
        
        # Handle owl:intersectionOf - find Wine in the intersection
        for s, p, o in g:
            if p == OWL.intersectionOf and not isinstance(o, Literal):
                child = str(s)
                # Follow the RDF list to find classes in the intersection
                current = o
                while current and str(current) != 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil':
                    # Get rdf:first (the class in the list)
                    for _, first_p, first_o in g.triples((current, RDF.first, None)):
                        if not isinstance(first_o, Literal) and not is_blank_node(str(first_o)):
                            if child not in hierarchy:
                                hierarchy[child] = []
                            hierarchy[child].append(str(first_o))
                    
                    # Get rdf:rest (next item in list)
                    next_item = None
                    for _, rest_p, rest_o in g.triples((current, RDF.rest, None)):
                        next_item = rest_o
                        break
                    current = next_item
        
        return hierarchy
    
    def get_root_class(class_uri, hierarchy, max_depth=20):
        """Get the most generic (root) class by following parent chain"""
        visited = set()
        current = class_uri
        
        for _ in range(max_depth):
            if current in visited:
                break
            visited.add(current)
            
            # Check if this class has parents
            if current in hierarchy:
                # Pick the first non-OWL/RDF parent
                parents = [p for p in hierarchy[current] 
                          if not p.startswith('http://www.w3.org/2002/07/owl#')
                          and not p.startswith('http://www.w3.org/2000/01/rdf-schema#')
                          and not p.startswith('http://www.w3.org/1999/02/')]
                if parents:
                    current = parents[0]  # Follow first parent
                else:
                    break
            else:
                break
        
        return current
    
    hierarchy = build_class_hierarchy()
    
    # Collect all nodes
    all_nodes = {str(s) for s, _, _ in g} | {str(o) for _, _, o in g if not isinstance(o, Literal)}
    
    if not include_blank_nodes:
        nodes = {n for n in all_nodes if not is_blank_node(n)}
    else:
        nodes = all_nodes
    
    node_to_var = {node: f"n{i}" for i, node in enumerate(nodes)}
    
    parts = []
    
    # Add all nodes
    for node in nodes:
        var = node_to_var[node]
        instance_name = node.split('/')[-1].split('#')[-1]
        
        if is_blank_node(node):
            label = "BlankNode"
            name = "BlankNode"
            uri_value = sanitize_value(node)
            name_value = sanitize_value(name)
            parts.append(f"({var}:{label} {{uri: '{uri_value}', name: '{name_value}'}})")
        else:
            # Check if this node is an instance (has rdf:type)
            instance_type = None
            for s, p, o in g:
                if str(s) == node and p == RDF.type and not isinstance(o, Literal):
                    o_str = str(o)
                    if o_str != 'http://www.w3.org/2002/07/owl#Class' and not is_blank_node(o_str):
                        instance_type = o_str
                        break
            
            if instance_type:
                # This is an instance - find its root class
                root_class = get_root_class(instance_type, hierarchy)
                label = sanitize_identifier(root_class.split('/')[-1].split('#')[-1])
                name = instance_name
                
                uri_value = sanitize_value(node)
                name_value = sanitize_value(name)
                type_value = sanitize_value(instance_type)
                parts.append(f"({var}:{label} {{uri: '{uri_value}', name: '{name_value}', type: '{type_value}'}})")
            else:
                # This is a class definition - find its root
                if node in hierarchy or any(node in parents for parents in hierarchy.values()):
                    root_class = get_root_class(node, hierarchy)
                    label = sanitize_identifier(root_class.split('/')[-1].split('#')[-1])
                else:
                    label = sanitize_identifier(instance_name)
                
                name = instance_name
                uri_value = sanitize_value(node)
                name_value = sanitize_value(name)
                parts.append(f"({var}:{label} {{uri: '{uri_value}', name: '{name_value}'}})")
    
    # Add all edges
    for s, p, o in g:
        if not isinstance(o, Literal):
            if str(s) not in node_to_var or str(o) not in node_to_var:
                continue
            
            s_var = node_to_var[str(s)]
            o_var = node_to_var[str(o)]
            pred = sanitize_identifier(str(p).split('/')[-1].split('#')[-1])
            parts.append(f"({s_var})-[:{pred}]->({o_var})")
    
    return "CREATE " + ",\n".join(parts)

In [42]:
%%time

# Create Cypher CREATE command for TuringDB v1
graph_CREATE_command = rdf_to_cypher(path_rdf, include_blank_nodes=False)
print(graph_CREATE_command)

# Save Cypher query to file
with open(f"{path_data}/{example_name}_tree.cypher", "w") as f:
    f.write(graph_CREATE_command)

CREATE (n0:Winery {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#McGuinnesso', name: 'McGuinnesso', type: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Winery'}),
(n1:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#GaryFarrellMerlot', name: 'GaryFarrellMerlot', type: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Merlot'}),
(n2:Thing {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#SauvignonBlancGrape', name: 'SauvignonBlancGrape', type: 'http://www.w3.org/2002/07/owl#Thing'}),
(n3:Winery {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#GaryFarrell', name: 'GaryFarrell', type: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Winery'}),
(n4:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#SchlossRothermelTrochenbierenausleseRiesling', name: 'SchlossRothermelTrochenbierenausleseRiesling', type: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#SweetRiesling'}),
(n5:ObjectProperty

## Using networkx

In [71]:
def rdf_to_cypher_nx(rdf_file, include_blank_nodes=False):
    from rdflib import Graph #, RDF, RDFS, OWL, Literal
    from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
    
    g = Graph()
    result = g.parse(rdf_file)

    G = rdflib_to_networkx_multidigraph(result)

    return G

In [73]:
G = rdf_to_cypher_nx(path_rdf)
print(G)

MultiDiGraph with 733 nodes and 1839 edges


In [74]:
for nodes in G.nodes(data=True):
    print(nodes)

(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Dry'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MountadamPinotNoir'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#PinotNoir'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#CabernetFranc'), {})
(rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'), {})
(rdflib.term.BNode('Na69323a4dbbb48bb9ca8268064fa894e'), {})
(rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'), {})
(rdflib.term.BNode('N7820cafd23d146c196faf920bbd885fb'), {})
(rdflib.term.BNode('N422ad33260b245189cba1ce599e2dbfe'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#GaryFarrellMerlot'), {})
(rdflib.term.BNode('Nc4dda7f5f7054bffa3e4cff73d2dfa74'), {})
(rdflib.term.BNode('N0c07547f2e614dffae10b174223

In [75]:
for edges in G.edges(data=True):
    print(edges)

(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Dry'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Marietta'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Zinfandel'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Medium'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#MariettaZinfandel'), rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#SonomaRegion'), {})
(rdflib.term.URIRef('http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine#Marietta

## Test Owlready2 package

In [79]:
#pip install owlready2

from owlready2 import *

onto = get_ontology(path_rdf).load()

# Get all individuals (instances)
print("\n=== INDIVIDUALS ===")
for ind in onto.individuals():
    print(f"Individual: {ind.name}")
    print(f"Classes: {[c.name for c in ind.is_a]}")
    print(f"Properties: {[(p.name, getattr(ind, p.name, None)) for p in ind.get_properties()]}")
    print("---")

# Get all classes with their hierarchy
print("\n=== CLASS HIERARCHY ===")
for cls in onto.classes():
    # Get direct parent (not all ancestors)
    parents = [c.name for c in cls.is_a if isinstance(c, ThingClass)]
    print(f"Class: {cls.name}, Direct Parents: {parents}")


=== INDIVIDUALS ===

=== CLASS HIERARCHY ===
Class: Wine, Direct Parents: ['PotableLiquid']
Class: Winery, Direct Parents: ['Thing']
Class: Vintage, Direct Parents: ['Thing']
Class: WineGrape, Direct Parents: ['Grape']
Class: WhiteWine, Direct Parents: ['Thing']
Class: WhiteTableWine, Direct Parents: ['Thing']
Class: TableWine, Direct Parents: ['Thing']
Class: WhiteNonSweetWine, Direct Parents: ['Thing']
Class: WhiteLoire, Direct Parents: ['Thing']
Class: Loire, Direct Parents: ['Thing']
Class: WhiteBurgundy, Direct Parents: ['Thing']
Class: Burgundy, Direct Parents: ['Thing']
Class: WhiteBordeaux, Direct Parents: ['Thing']
Class: Bordeaux, Direct Parents: ['Thing']
Class: Region, Direct Parents: ['Thing']
Class: VintageYear, Direct Parents: ['Thing']
Class: WineDescriptor, Direct Parents: ['Thing']
Class: WineSugar, Direct Parents: ['WineTaste']
Class: WineBody, Direct Parents: ['WineTaste']
Class: WineFlavor, Direct Parents: ['WineTaste']
Class: WineColor, Direct Parents: ['WineDesc

In [146]:
def owlready_to_cypher(rdf_file, include_blank_nodes=False):
    from owlready2 import get_ontology, ThingClass, Thing, ObjectProperty, DataProperty, sync_reasoner_pellet
    
    onto = get_ontology(f"file://{rdf_file}").load()
    
    # Run reasoner
    try:
        with onto:
            sync_reasoner_pellet(infer_property_values=True, infer_data_property_values=True)
        print("Reasoner completed successfully")
    except Exception as e:
        print(f"Reasoner failed: {e}, continuing without reasoning")
    
    node_parts = []
    edge_parts = []
    node_counter = 0
    node_to_var = {}
    
    def get_var(uri):
        nonlocal node_counter
        if uri not in node_to_var:
            node_to_var[uri] = f"n{node_counter}"
            node_counter += 1
        return node_to_var[uri]
    
    def get_most_generic_parent(cls):
        """Get the most generic non-Thing parent"""
        ancestors = [a for a in cls.ancestors() 
                    if isinstance(a, ThingClass) 
                    and a.name != 'Thing'
                    and a != cls]
        return ancestors[-1] if ancestors else cls
    
    # First pass: collect all nodes that will be created
    valid_nodes = set()
    
    # Add all classes
    for cls in onto.classes():
        if cls.name != 'Thing':
            valid_nodes.add(cls.iri)
    
    # Add all properties
    for prop in onto.properties():
        valid_nodes.add(prop.iri)
    
    # Add all individuals
    for ind in onto.individuals():
        valid_nodes.add(ind.iri)
    
    # Create nodes for all classes
    for cls in onto.classes():
        if cls.name == 'Thing':
            continue
        
        var = get_var(cls.iri)
        root = get_most_generic_parent(cls)
        label = sanitize_identifier(root.name)
        name = cls.name
        
        # Build properties
        props = [
            f"uri: '{sanitize_value(cls.iri)}'",
            f"name: '{sanitize_value(name)}'",
            f"type: 'Class'"
        ]
        
        # Add direct parent names (only those in valid_nodes)
        direct_parents = [p.name for p in cls.is_a 
                         if isinstance(p, ThingClass) 
                         and p.name != 'Thing'
                         and p.iri in valid_nodes]
        if direct_parents:
            props.append(f"parents: '{sanitize_value(", ".join(direct_parents))}'")
        
        # Add comment if exists
        if hasattr(cls, 'comment') and cls.comment:
            comment = cls.comment[0] if isinstance(cls.comment, list) else cls.comment
            props.append(f"comment: '{sanitize_value(str(comment))}'")
        
        # Add label if exists
        if hasattr(cls, 'label') and cls.label:
            lbl = cls.label[0] if isinstance(cls.label, list) else cls.label
            props.append(f"rdfs_label: '{sanitize_value(str(lbl))}'")
        
        node_parts.append(f"({var}:{label} {{{', '.join(props)}}})")
    
    # Track added edges to avoid duplicates
    added_edges = set()
    
    # Create subClassOf relationships (only for valid nodes)
    for cls in onto.classes():
        if cls.name == 'Thing':
            continue
        
        s_var = get_var(cls.iri)
        
        for parent in cls.is_a:
            if isinstance(parent, ThingClass) and parent.name != 'Thing' and parent.iri in valid_nodes:
                o_var = get_var(parent.iri)
                edge = f"({s_var})-[:subClassOf]->({o_var})"
                if edge not in added_edges:
                    edge_parts.append(edge)
                    added_edges.add(edge)
    
    # Create nodes for properties
    for prop in onto.properties():
        prop_var = get_var(prop.iri)
        prop_type = "ObjectProperty" if isinstance(prop, ObjectProperty) else "DataProperty"
        
        props_list = [
            f"uri: '{sanitize_value(prop.iri)}'",
            f"name: '{sanitize_value(prop.name)}'",
            f"type: 'Property'",
            f"property_type: '{prop_type}'"
        ]
        
        node_parts.append(f"({prop_var}:Property {{{', '.join(props_list)}}})")
        
        # Add domain relationships (only for valid nodes)
        if hasattr(prop, 'domain') and prop.domain:
            domains = prop.domain if isinstance(prop.domain, list) else [prop.domain]
            for domain in domains:
                if isinstance(domain, ThingClass) and domain.name != 'Thing' and domain.iri in valid_nodes:
                    domain_var = get_var(domain.iri)
                    edge = f"({domain_var})-[:hasDomain]->({prop_var})"
                    if edge not in added_edges:
                        edge_parts.append(edge)
                        added_edges.add(edge)
        
        # Add range relationships (only for valid nodes)
        if hasattr(prop, 'range') and prop.range:
            ranges = prop.range if isinstance(prop.range, list) else [prop.range]
            for rng in ranges:
                if isinstance(rng, ThingClass) and rng.name != 'Thing' and rng.iri in valid_nodes:
                    range_var = get_var(rng.iri)
                    edge = f"({prop_var})-[:hasRange]->({range_var})"
                    if edge not in added_edges:
                        edge_parts.append(edge)
                        added_edges.add(edge)
    
    # Handle individuals (only for valid nodes)
    for ind in onto.individuals():
        s_var = get_var(ind.iri)
        
        for prop in ind.get_properties():
            values = getattr(ind, prop.name, [])
            if not isinstance(values, list):
                values = [values]
            
            for value in values:
                if isinstance(value, Thing) and value.iri in valid_nodes:
                    o_var = get_var(value.iri)
                    pred = sanitize_identifier(prop.name)
                    edge = f"({s_var})-[:{pred}]->({o_var})"
                    if edge not in added_edges:
                        edge_parts.append(edge)
                        added_edges.add(edge)
    
    # Combine nodes first, then edges
    return "CREATE " + ",\n".join(node_parts + edge_parts)

In [147]:
%%time

graph_CREATE_command_owlready = owlready_to_cypher(path_rdf, include_blank_nodes=True)
print(graph_CREATE_command_owlready)

# Save Cypher query to file
with open(f"{path_data}/{example_name}_owlready.cypher", "w") as f:
    f.write(graph_CREATE_command_owlready)

* Owlready2 * Running Pellet...
    java -Xmx2000M -cp /home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/antlr-3.2.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-core-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/owlapi-distribution-3.4.3-bin.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/xercesImpl-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/xml-apis-1.4.01.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-arq-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/slf4j-log4j12-1.6.4.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jgrapht-jdk1.5.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/log4j-core-2.19.0.jar:/home/dev/turingdb-examples/.

Reasoner completed successfully
CREATE (n0:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Wine', name: 'Wine', type: 'Class', rdfs_label: 'wine'}),
(n1:Winery {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Winery', name: 'Winery', type: 'Class'}),
(n2:Vintage {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Vintage', name: 'Vintage', type: 'Class'}),
(n3:Grape {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WineGrape', name: 'WineGrape', type: 'Class'}),
(n4:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WhiteWine', name: 'WhiteWine', type: 'Class', parents: 'Wine'}),
(n5:WhiteWine {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WhiteTableWine', name: 'WhiteTableWine', type: 'Class', parents: 'TableWine, WhiteNonSweetWine, DryWine'}),
(n6:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##TableWine', name: 'TableWine', type: 'Class', parents: 'Wine'}),
(n7

* Owlready2 * Pellet took 1.1736650466918945 seconds
* Owlready * Equivalenting: wine#.DryRedWine wine#.RedTableWine
* Owlready * Equivalenting: wine#.DryRedWine wine#.RedTableWine
* Owlready * Equivalenting: wine#.RedTableWine wine#.DryRedWine
* Owlready * Equivalenting: wine#.RedTableWine wine#.DryRedWine
* Owlready * Equivalenting: wine#.DryWine wine#.TableWine
* Owlready * Equivalenting: wine#.TableWine wine#.DryWine
* Owlready * Equivalenting: wine#.DryWhiteWine wine#.WhiteTableWine
* Owlready * Equivalenting: wine#.DryWhiteWine wine#.WhiteTableWine
* Owlready * Equivalenting: wine#.WhiteTableWine wine#.DryWhiteWine
* Owlready * Equivalenting: wine#.WhiteTableWine wine#.DryWhiteWine
* Owlready * (NB: only changes on entities loaded in Python are shown, other changes are done but not listed)


# Create graph using `turingdb` python package

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h3>
        <ul>
            <li>Create your TuringDB account</li>
            <li>Create your instance in the <a href="https://console.turingdb.ai/auth">TuringDB Cloud UI</a></li>
            <li>Copy your Instance ID from the Database Instances management page</li>
            <li>Get API Key from the Settings in UI</li>
        </ul>
        Remember to have your instance active while working in this notebook !
    </h3>
</div>

In [148]:
from turingdb import TuringDB

# Create TuringDB client
client = TuringDB(
    host="http://localhost:6666"  # Remove this parameter and set the two parameters below
    # instance_id=os.getenv("INSTANCE_ID"),
    # auth_token=os.getenv("AUTH_TOKEN"),
)

In [149]:
%%time

client.s3_connect(
    bucket_name="turing-internal",
    region="eu-west-2",
    access_key=os.getenv("AWS_ACCESS_KEY"),
    secret_key=os.getenv("AWS_SECRET_KEY"),
)

CPU times: user 86.3 ms, sys: 41.4 ms, total: 128 ms
Wall time: 199 ms


In [150]:
# Get list of loaded graphs
list_graphs = client.list_loaded_graphs()
list_graphs

['wine_ontology5',
 'wine_ontology3',
 'wine_ontology2',
 'wine_ontology1',
 'wine_ontology4',
 'people',
 'default']

In [151]:
# Set graph name
graph_name_prefix = example_name
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
            and re.sub(graph_name_prefix, "", g).isdigit()
        ]
        + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name = re.sub("-", "_", graph_name)
print(f"graph_name: {graph_name}")

graph_name: wine_ontology6


In [152]:
from turingdb.exceptions import TuringDBException

In [153]:
%%time

# Set graph
try:
    client.create_graph(graph_name)
except TuringDBException as e:
    print(e)

# Set working graph
client.set_graph(graph_name)

CPU times: user 3.07 ms, sys: 1.02 ms, total: 4.08 ms
Wall time: 19.3 ms


In [154]:
%%time

# Create a new change on the graph
client.checkout()
change = client.new_change()
print(f"Current change {change}")

# Checkout into the change
client.checkout(change=change)

Current change 0
CPU times: user 2.3 ms, sys: 1.05 ms, total: 3.34 ms
Wall time: 2.69 ms


In [155]:
# Sanitise query for v1
def sanitise_for_v1(query):
    query = query.replace("->", "-")
    
    return query

graph_CREATE_command_owlready = sanitise_for_v1(graph_CREATE_command_owlready)
#print(graph_CREATE_command_owlready)

In [156]:
%%time

# Run CREATE command
print("\nExecuting query on TuringDB...")
start_time = time.time()
result = client.query(graph_CREATE_command_owlready)
execution_time = time.time() - start_time
print(f"âœ“ Graph created successfully in {execution_time:.2f} seconds")

# Commit the change
client.query("COMMIT")
client.query("CHANGE SUBMIT")

# Checkout into main
client.checkout()


Executing query on TuringDB...
âœ“ Graph created successfully in 0.01 seconds
CPU times: user 6.98 ms, sys: 0 ns, total: 6.98 ms
Wall time: 103 ms


# Query TuringDB

## Use metaqueries to have insight on graph overall structure

<h3>
    To learn more about ðŸ“® Metaqueries, please check TuringDB documentation on this <a href="https://turingdb.mintlify.app/query/cypher_subset#%F0%9F%93%AE-metaqueries">link</a>
</h3>

In [165]:
%%time

# CALL PROPERTIES() - returns a column of all the different node and edge properties and their types in the database
command = """
CALL PROPERTIES()
"""
df_PROPERTIES = client.query(command)
if df_PROPERTIES.empty:
    print("No result found")
else:
    df_PROPERTIES.columns = ["Property_ID", "Property_name", "Property_type"]
    display(df_PROPERTIES)

Unnamed: 0,Property_ID,Property_name,Property_type
0,0,uri,String
1,1,name,String
2,2,type,String
3,3,rdfs_label,String
4,4,parents,String
5,5,comment,String
6,6,property_type,String


CPU times: user 7.81 ms, sys: 3.13 ms, total: 10.9 ms
Wall time: 9.09 ms


In [166]:
# Get node properties
nodes_properties = df_PROPERTIES["Property_name"].values.tolist()
print(f"Node properties: {nodes_properties}")

Node properties: ['uri', 'name', 'type', 'rdfs_label', 'parents', 'comment', 'property_type']


In [167]:
%%time

# CALL LABELS () - returns a column of all the different node labels
command = """
CALL LABELS()
"""
df_LABELS = client.query(command)
if df_LABELS.empty:
    print("No result found")
else:
    df_LABELS.columns = ["Node_type_ID", "Node_label"]
    display(df_LABELS)

Unnamed: 0,Node_type_ID,Node_label
0,0,PotableLiquid
1,1,Winery
2,2,Vintage
3,3,Grape
4,4,WhiteWine
5,5,Loire
6,6,DryWhiteWine
7,7,DryWine
8,8,SemillonOrSauvignonBlanc
9,9,Region


CPU times: user 7.1 ms, sys: 1.91 ms, total: 9.01 ms
Wall time: 7.73 ms


In [168]:
%%time

# CALL EDGETYPES() - returns a column of all the different edge types (edge equivalent of node labels)
command = """
CALL EDGETYPES()
"""
df_EDGETYPES = client.query(command)
if df_EDGETYPES.empty:
    print("No result found")
else:
    df_EDGETYPES.columns = ["Edge_type_ID", "Edge_label"]
    display(df_EDGETYPES)

Unnamed: 0,Edge_type_ID,Edge_label
0,0,subClassOf
1,1,hasDomain
2,2,hasRange


CPU times: user 7.25 ms, sys: 1.03 ms, total: 8.28 ms
Wall time: 6.92 ms


In [169]:
%%time

# CALL LABELSETS() - returns a two columns describing combinations of node labels
command = """
CALL LABELSETS()
"""
df_LABELSETS = client.query(command)
if df_LABELSETS.empty:
    print("No result found")
else:
    df_LABELSETS.columns = ["Node_type_ID", "Node_label"]
    display(df_LABELSETS)

Unnamed: 0,Node_type_ID,Node_label
0,0,PotableLiquid
1,1,Winery
2,2,Vintage
3,3,Grape
4,4,WhiteWine
5,5,Loire
6,6,DryWhiteWine
7,7,DryWine
8,8,SemillonOrSauvignonBlanc
9,9,Region


CPU times: user 6.8 ms, sys: 1.87 ms, total: 8.67 ms
Wall time: 7.59 ms


In [170]:
%%time

# Find number of nodes and number of edges in the graph
n_nodes = len(client.query("MATCH (n) RETURN n"))
n_edges = len(client.query("MATCH (n)--(m) RETURN n, m"))
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges")

Graph: 87 nodes and 133 edges
CPU times: user 5.33 ms, sys: 29 Î¼s, total: 5.36 ms
Wall time: 4.17 ms


## Simple queries

In [171]:
from turingdb_examples.utils import get_return_statements

In [173]:
%%time

# Match all edges and return them
command = """
MATCH (n)-[e]-(m)
RETURN n.name, n.type, e, m.name, m.type
"""
df_all_edges = client.query(command)
if df_all_edges.empty:
    print("No result found")
else:
    df_all_edges.columns = get_return_statements(command)
    display(df_all_edges)

Unnamed: 0,n.name,n.type,e,m.name,m.type
0,DryWine,Class,0,Wine,Class
1,SweetWine,Class,1,Wine,Class
2,DessertWine,Class,2,Wine,Class
3,FullBodiedWine,Class,3,Wine,Class
4,Gamay,Class,4,Wine,Class
...,...,...,...,...,...
128,hasColor,Property,128,WineColor,Class
129,hasBody,Property,129,WineBody,Class
130,hasFlavor,Property,130,WineFlavor,Class
131,hasSugar,Property,131,WineSugar,Class


CPU times: user 13 ms, sys: 1.15 ms, total: 14.2 ms
Wall time: 12.4 ms


In [175]:
# Get all nodes by label type
for label in df_LABELS["Node_label"]:
    print(100 * "#")
    print(f"label: {label}")
    display(client.query(f"MATCH (n:{label}) RETURN n.name"))

print(100 * "#")

####################################################################################################
label: PotableLiquid


Unnamed: 0,0
0,DryWine
1,SweetWine
2,DessertWine
3,FullBodiedWine
4,Gamay
5,CaliforniaWine
6,TexasWine
7,GermanWine
8,ItalianWine
9,LateHarvest


####################################################################################################
label: Winery


Unnamed: 0,0
0,Winery


####################################################################################################
label: Vintage


Unnamed: 0,0
0,Vintage


####################################################################################################
label: Grape


Unnamed: 0,0
0,WineGrape


####################################################################################################
label: WhiteWine


Unnamed: 0,0
0,PinotBlanc
1,SemillonOrSauvignonBlanc
2,Riesling
3,WhiteTableWine
4,WhiteNonSweetWine
5,Chardonnay
6,DryWhiteWine


####################################################################################################
label: Loire


Unnamed: 0,0
0,Sancerre
1,WhiteLoire
2,Muscadet


####################################################################################################
label: DryWhiteWine


Unnamed: 0,0
0,WhiteBurgundy
1,DryRiesling
2,Meursault


####################################################################################################
label: DryWine


Unnamed: 0,0
0,Chianti
1,DryRedWine
2,CabernetSauvignon
3,CabernetFranc
4,Beaujolais
5,Merlot
6,PetiteSyrah
7,Burgundy
8,Zinfandel
9,RedTableWine


####################################################################################################
label: SemillonOrSauvignonBlanc


Unnamed: 0,0
0,Semillon
1,SauvignonBlanc
2,WhiteBordeaux


####################################################################################################
label: Region


Unnamed: 0,0
0,Region


####################################################################################################
label: VintageYear


Unnamed: 0,0
0,VintageYear


####################################################################################################
label: WineDescriptor


Unnamed: 0,0
0,WineBody
1,WineFlavor
2,WineColor
3,WineSugar
4,WineTaste
5,WineDescriptor


####################################################################################################
label: WhiteNonSweetWine


Unnamed: 0,0
0,Tours
1,CheninBlanc


####################################################################################################
label: DessertWine


Unnamed: 0,0
0,SweetRiesling


####################################################################################################
label: RedBordeaux


Unnamed: 0,0
0,StEmilion
1,Pauillac
2,Medoc
3,Margaux


####################################################################################################
label: LateHarvest


Unnamed: 0,0
0,IceWine
1,Sauternes


####################################################################################################
label: RedWine


Unnamed: 0,0
0,Meritage
1,RedBordeaux
2,Port
3,PinotNoir


####################################################################################################
label: Wine


Unnamed: 0,0
0,FrenchWine


####################################################################################################
label: RedBurgundy


Unnamed: 0,0
0,CotesDOr


####################################################################################################
label: RoseWine


Unnamed: 0,0
0,Anjou


####################################################################################################
label: Property


Unnamed: 0,0
0,producesWine
1,hasWineDescriptor
2,madeIntoWine
3,adjacentRegion
4,hasVintageYear
5,locatedIn
6,hasColor
7,hasBody
8,hasFlavor
9,hasSugar


####################################################################################################


## More complex queries

In [193]:
%%time

# 1. Get all wine types (classes that are subclasses of Wine/PotableLiquid)
#command = """
#MATCH (wine:PotableLiquid)
#WHERE wine.type = 'Class'
#RETURN wine.name, wine.parents
#LIMIT 20
#"""
command = """
MATCH (wine:PotableLiquid {type: 'Class'})
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,wine.name,wine.parents
0,DryWine,Wine
1,SweetWine,Wine
2,DessertWine,Wine
3,FullBodiedWine,Wine
4,Gamay,Wine
5,CaliforniaWine,Wine
6,TexasWine,Wine
7,GermanWine,Wine
8,ItalianWine,Wine
9,LateHarvest,SweetWine


CPU times: user 8.74 ms, sys: 2.11 ms, total: 10.8 ms
Wall time: 9.07 ms


In [194]:
%%time

# 2. Find all properties and their domains/ranges
#command = """
#MATCH (domain)-[:hasDomain]->(prop:Property)-[:hasRange]->(range)
#RETURN domain.name, prop.name, prop.property_type, range.name
#"""
command = """
MATCH (domain)-[:hasDomain]-(prop:Property)-[:hasRange]-(range)
RETURN domain.name, prop.name, prop.property_type, range.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,domain.name,prop.name,prop.property_type,range.name
0,Wine,hasColor,DataProperty,WineColor
1,Wine,madeFromGrape,DataProperty,WineGrape
2,Wine,hasWineDescriptor,DataProperty,WineDescriptor
3,Vintage,hasVintageYear,DataProperty,VintageYear
4,Region,adjacentRegion,DataProperty,Region


CPU times: user 8.09 ms, sys: 2.03 ms, total: 10.1 ms
Wall time: 8.72 ms


In [195]:
%%time

# 3. Get the complete class hierarchy for a specific wine (e.g., Chardonnay)
#command = """
#MATCH path = (leaf)-[:subClassOf*]->(root)
#WHERE leaf.name = 'Chardonnay'
#RETURN leaf.name, root.name, LENGTH(path) AS depth
#ORDER BY depth DESC
#"""
command = """
MATCH (leaf {name: 'Chardonnay'})-[:subClassOf]-(root)
RETURN leaf.name, root.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,leaf.name,root.name
0,Chardonnay,WhiteWine


CPU times: user 7.8 ms, sys: 2 ms, total: 9.8 ms
Wall time: 7.95 ms


In [196]:
%%time

# 4. Find all red wines (wines that are subclass of RedWine)
#command = """
#MATCH (wine)-[:subClassOf*]->(parent)
#WHERE parent.name = 'RedWine'
#RETURN wine.name, wine.parents
#"""
command = """
MATCH (wine)-[:subClassOf]-(parent {name: 'RedWine'})
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,wine.name,wine.parents
0,DryRedWine,"TableWine, RedWine, DryWine"
1,RedTableWine,"TableWine, RedWine, DryWine"
2,Meritage,RedWine
3,RedBordeaux,"Bordeaux, RedWine"
4,Port,"RedWine, FullBodiedWine, SweetWine"
5,PinotNoir,RedWine


CPU times: user 8.1 ms, sys: 2.04 ms, total: 10.1 ms
Wall time: 8.41 ms


In [197]:
%%time

# 5. Find all direct children of Wine class
#command = """
#MATCH (child)-[:subClassOf]->(parent)
#WHERE parent.name = 'Wine'
#RETURN child.name, child.type
#"""
command = """
MATCH (child)-[:subClassOf]-(parent {name: 'Wine'})
RETURN child.name, child.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,child.name,child.type
0,DryWine,Class
1,SweetWine,Class
2,DessertWine,Class
3,FullBodiedWine,Class
4,Gamay,Class
5,CaliforniaWine,Class
6,TexasWine,Class
7,GermanWine,Class
8,ItalianWine,Class
9,Bordeaux,Class


CPU times: user 9.61 ms, sys: 1.07 ms, total: 10.7 ms
Wall time: 9.08 ms


In [198]:
%%time

# 6. Get all properties that Wine class can have
#command = """
#MATCH (wine)-[:hasDomain]->(prop:Property)
#WHERE wine.name = 'Wine'
#RETURN prop.name, prop.property_type
#"""
command = """
MATCH (wine {name: 'Wine'})-[:hasDomain]-(prop:Property)
RETURN prop.name, prop.property_type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,prop.name,prop.property_type
0,hasColor,DataProperty
1,madeFromGrape,DataProperty
2,hasWineDescriptor,DataProperty


CPU times: user 7.28 ms, sys: 2.84 ms, total: 10.1 ms
Wall time: 8.4 ms


In [200]:
%%time

# 7. Find wine classes that have the most parent classes (complex definitions)
#command = """
#MATCH (wine)-[:subClassOf]->(parent)
#WHERE wine.type = 'Class'
#WITH wine.name AS wine_name, COUNT(parent) AS parent_count
#RETURN wine_name, parent_count
#ORDER BY parent_count DESC
#LIMIT 10
#"""
command = """
MATCH (wine {type: 'Class'})-[:subClassOf]-(parent)
RETURN wine.name, parent.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,wine.name,parent.name
0,DryWine,Wine
1,SweetWine,Wine
2,DessertWine,Wine
3,FullBodiedWine,Wine
4,Gamay,Wine
...,...,...
113,PinotNoir,RedWine
114,FrenchWine,Wine
115,CotesDOr,RedBurgundy
116,Anjou,RoseWine


CPU times: user 7.39 ms, sys: 1.05 ms, total: 8.44 ms
Wall time: 7.18 ms


In [202]:
%%time

# 8. Find all French wine types
#command = """
#MATCH (wine)-[:subClassOf*]->(parent)
#WHERE parent.name = 'FrenchWine'
#RETURN wine.name
#"""
command = """
MATCH (wine)-[:subClassOf]-(parent {name: 'FrenchWine'})
RETURN wine.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

No result found
CPU times: user 1.69 ms, sys: 980 Î¼s, total: 2.67 ms
Wall time: 1.66 ms


In [208]:
%%time

# 9. Find properties that point to WineDescriptor classes
#command = """
#MATCH (prop:Property)-[:hasRange]->(desc)
#WHERE desc.name CONTAINS 'Wine' AND desc.type = 'Class'
#RETURN prop.name, desc.name
#"""
command = """
MATCH (prop:Property)-[:hasRange]-(desc {name ~= 'Wine'})
RETURN prop.name, desc.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,prop.name,desc.name
0,hasWineDescriptor,WineDescriptor
1,hasColor,WineColor
2,hasBody,WineBody
3,hasFlavor,WineFlavor
4,hasSugar,WineSugar
5,madeFromGrape,WineGrape


CPU times: user 7.84 ms, sys: 2 ms, total: 9.85 ms
Wall time: 8.08 ms


In [209]:
%%time

# 10. Get the full subclass tree depth for all wine types
#command = """
#MATCH path = (wine)-[:subClassOf*]->(root:PotableLiquid)
#WHERE wine.type = 'Class' AND root.name = 'Wine'
#RETURN wine.name, LENGTH(path) AS hierarchy_depth
#ORDER BY hierarchy_depth DESC
#LIMIT 15
#"""
command = """
MATCH (wine {type: 'Class'})-[:subClassOf]-(root:PotableLiquid {name: 'Wine'})
RETURN wine.name, root.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    df.columns = get_return_statements(command)
    display(df)

Unnamed: 0,wine.name,root.name
0,DryWine,Wine
1,SweetWine,Wine
2,DessertWine,Wine
3,FullBodiedWine,Wine
4,Gamay,Wine
5,CaliforniaWine,Wine
6,TexasWine,Wine
7,GermanWine,Wine
8,ItalianWine,Wine
9,Bordeaux,Wine


CPU times: user 10.3 ms, sys: 911 Î¼s, total: 11.2 ms
Wall time: 9.39 ms


In [210]:
print("Notebook finished !")

Notebook finished !
