<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Integrate RDF file
    </h1>
    <p>
        Link to dataset : <a href="TR/owl-guide/wine.rdf">Link to Wine RDF file download</a>
    </p>
</div>

# Import modules and functions

In [1]:
import os
import re
import time
from tqdm.auto import tqdm

import json
import os

In [2]:
%load_ext autoreload
%autoreload 2

# Check data files are available

In [3]:
example_name = "wine_ontology"
path_data = f"{os.getcwd()}/data/{example_name}"
if not os.path.exists(path_data):
    print(f"{path_data} does not exist")
    os.makedirs(path_data)
    print(f"{path_data} folder created âœ”")

list_files = sorted(os.listdir(path_data))
rdf_filename = "wine_ontology.rdf"
if not rdf_filename in list_files:
    print(f"Wine ontology RDF file is not available in {path_data}. \n")
    url_rdf = "https://www.w3.org/TR/owl-guide/wine.rdf"
    print(
        f"Downloading from: {url_rdf}"
        "\n...\n"
    )
    os.system(f"wget {url_rdf} -O {path_data}/{rdf_filename}")

# Load dataset

In [4]:
path_rdf = f"{path_data}/{example_name}.rdf"
path_rdf

'/home/ubuntu/turingdb-examples/examples/notebooks/public_version/data/wine_ontology/wine_ontology.rdf'

# Create `turingdb` python client

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h3>
        Remember to have your <code>turingdb</code> server running while working in this notebook !
    </h3>
</div>

In [7]:
from turingdb import TuringDB

# Create TuringDB client
# set host parameter to the URL (as string) on which TuringDB is running,
# default "http://localhost:6666"
client = TuringDB(host="http://localhost:6666")
try:
    client.warmup()
except Exception as e:
    print(f"TuringDB not started, please run `uv run turingdb` in your terminal")

In [8]:
# Get list of available graphs
list_graphs = client.list_available_graphs()

In [9]:
# Get list of loaded graphs
client.list_loaded_graphs()

['healthcare_dataset1_subgraph',
 'healthcare_dataset1',
 'crypto_orbitaal_fraud_detection1',
 'default']

# Set graph name

In [10]:
# Set graph name
graph_name_prefix = example_name
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
            and re.sub(graph_name_prefix, "", g).isdigit()
        ]
        + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name = re.sub("-", "_", graph_name)
graph_name

'wine_ontology1'

# Create JSONL file

In [11]:
def sanitize_identifier(s):
    """Sanitize identifiers (labels, relationship types, property names)"""
    if not s:
        return "ID_empty"
    
    # Replace invalid characters with underscores
    s = s.replace('#', '_').replace('-', '_').replace('/', '_').replace('.', '_')
    
    # Prefix with ID_ if starts with digit or underscore
    if s[0].isdigit() or s[0] == '_':
        s = 'ID_' + s
    
    # Remove any other non-alphanumeric characters except underscores
    s = ''.join(c if c.isalnum() or c == '_' else '_' for c in s)
    
    return s


def sanitize_value(s):
    """Sanitize property values (escape quotes)"""
    return s.replace("'", "\\'").replace('"', '\\"')

In [12]:
def owlready_to_jsonl(rdf_file, filepath, include_blank_nodes=False):
    """
    Convert an RDF/OWL ontology to JSONL format for TuringDB's LOAD JSONL.

    Args:
        rdf_file: Path to the RDF/OWL file.
        filepath: Output JSONL file path.
        include_blank_nodes: Whether to include blank nodes (currently unused).

    Returns:
        str: filepath written
    """
    import json
    from owlready2 import (
        get_ontology, ThingClass, Thing,
        ObjectProperty, DataProperty, sync_reasoner_pellet
    )

    onto = get_ontology(f"file://{rdf_file}").load()

    try:
        with onto:
            sync_reasoner_pellet(
                infer_property_values=True, infer_data_property_values=True
            )
        print("Reasoner completed successfully")
    except Exception as e:
        print(f"Reasoner failed: {e}, continuing without reasoning")

    def sanitize_identifier(s):
        if not s:
            return "ID_empty"
        s = s.replace('#', '_').replace('-', '_').replace('/', '_').replace('.', '_')
        if s[0].isdigit() or s[0] == '_':
            s = 'ID_' + s
        s = ''.join(c if c.isalnum() or c == '_' else '_' for c in s)
        return s

    def get_most_generic_parent(cls):
        ancestors = [
            a for a in cls.ancestors()
            if isinstance(a, ThingClass) and a.name != 'Thing' and a != cls
        ]
        return ancestors[-1] if ancestors else cls

    # Collect all valid node URIs
    valid_uris = set()
    for cls in onto.classes():
        if cls.name != 'Thing':
            valid_uris.add(cls.iri)
    for prop in onto.properties():
        valid_uris.add(prop.iri)
    for ind in onto.individuals():
        valid_uris.add(ind.iri)

    # Assign sequential integer IDs
    uri_to_id = {uri: i for i, uri in enumerate(sorted(valid_uris))}

    with open(filepath, "w", encoding="utf-8") as f:
        # Write class nodes
        for cls in onto.classes():
            if cls.name == 'Thing' or cls.iri not in uri_to_id:
                continue
            int_id = uri_to_id[cls.iri]
            root = get_most_generic_parent(cls)
            label = sanitize_identifier(root.name)
            props = {"id": cls.iri, "uri": cls.iri, "name": cls.name, "type": "Class"}
            parents = [
                p.name for p in cls.is_a
                if isinstance(p, ThingClass) and p.name != 'Thing' and p.iri in valid_uris
            ]
            if parents:
                props["parents"] = ", ".join(parents)
            if hasattr(cls, 'comment') and cls.comment:
                c = cls.comment[0] if isinstance(cls.comment, list) else cls.comment
                props["comment"] = str(c)
            if hasattr(cls, 'label') and cls.label:
                lbl = cls.label[0] if isinstance(cls.label, list) else cls.label
                props["rdfs_label"] = str(lbl)
            f.write(json.dumps({
                "type": "node", "id": str(int_id),
                "labels": [label], "properties": props,
            }) + "\n")

        # Write property nodes
        for prop in onto.properties():
            if prop.iri not in uri_to_id:
                continue
            int_id = uri_to_id[prop.iri]
            prop_type = "ObjectProperty" if isinstance(prop, ObjectProperty) else "DataProperty"
            props = {
                "id": prop.iri, "uri": prop.iri, "name": prop.name,
                "type": "Property", "property_type": prop_type,
            }
            f.write(json.dumps({
                "type": "node", "id": str(int_id),
                "labels": ["Property"], "properties": props,
            }) + "\n")

        # Write relationships
        rel_id = 0
        added_edges = set()

        for cls in onto.classes():
            if cls.name == 'Thing' or cls.iri not in uri_to_id:
                continue
            for parent in cls.is_a:
                if isinstance(parent, ThingClass) and parent.name != 'Thing' and parent.iri in valid_uris:
                    edge_key = (cls.iri, 'subClassOf', parent.iri)
                    if edge_key not in added_edges:
                        f.write(json.dumps({
                            "type": "relationship", "id": str(rel_id),
                            "label": "subClassOf",
                            "start": {"id": str(uri_to_id[cls.iri])},
                            "end": {"id": str(uri_to_id[parent.iri])},
                            "properties": {},
                        }) + "\n")
                        added_edges.add(edge_key)
                        rel_id += 1

        for prop in onto.properties():
            if prop.iri not in uri_to_id:
                continue
            if hasattr(prop, 'domain') and prop.domain:
                domains = prop.domain if isinstance(prop.domain, list) else [prop.domain]
                for domain in domains:
                    if isinstance(domain, ThingClass) and domain.name != 'Thing' and domain.iri in valid_uris:
                        edge_key = (domain.iri, 'hasDomain', prop.iri)
                        if edge_key not in added_edges:
                            f.write(json.dumps({
                                "type": "relationship", "id": str(rel_id),
                                "label": "hasDomain",
                                "start": {"id": str(uri_to_id[domain.iri])},
                                "end": {"id": str(uri_to_id[prop.iri])},
                                "properties": {},
                            }) + "\n")
                            added_edges.add(edge_key)
                            rel_id += 1
            if hasattr(prop, 'range') and prop.range:
                ranges = prop.range if isinstance(prop.range, list) else [prop.range]
                for rng in ranges:
                    if isinstance(rng, ThingClass) and rng.name != 'Thing' and rng.iri in valid_uris:
                        edge_key = (prop.iri, 'hasRange', rng.iri)
                        if edge_key not in added_edges:
                            f.write(json.dumps({
                                "type": "relationship", "id": str(rel_id),
                                "label": "hasRange",
                                "start": {"id": str(uri_to_id[prop.iri])},
                                "end": {"id": str(uri_to_id[rng.iri])},
                                "properties": {},
                            }) + "\n")
                            added_edges.add(edge_key)
                            rel_id += 1

        for ind in onto.individuals():
            if ind.iri not in uri_to_id:
                continue
            for prop in ind.get_properties():
                values = getattr(ind, prop.name, [])
                if not isinstance(values, list):
                    values = [values]
                for value in values:
                    if isinstance(value, Thing) and value.iri in valid_uris:
                        pred = sanitize_identifier(prop.name)
                        edge_key = (ind.iri, pred, value.iri)
                        if edge_key not in added_edges:
                            f.write(json.dumps({
                                "type": "relationship", "id": str(rel_id),
                                "label": pred,
                                "start": {"id": str(uri_to_id[ind.iri])},
                                "end": {"id": str(uri_to_id[value.iri])},
                                "properties": {},
                            }) + "\n")
                            added_edges.add(edge_key)
                            rel_id += 1

    print(f"JSONL file written to: {filepath}")
    print(f"Ontology: {len(uri_to_id)} nodes, {rel_id} relationships")
    return filepath

print("Function owlready_to_jsonl() defined successfully")

Function owlready_to_jsonl() defined successfully


In [13]:
turingdb_dir = os.path.expanduser("~/.turing/data")
if not os.path.exists(turingdb_dir):
    raise ValueError(f"""
    TuringDB directory {turingdb_dir} does not exist.
    Make sure the value you set here is the same you set when running turingdb.
    """)

In [14]:
%%time

# Write RDF ontology to JSONL format
print("Writing RDF ontology to JSONL...")

jsonl_dir = os.path.expanduser(turingdb_dir)
jsonl_filename = f"{graph_name}.jsonl"
jsonl_filepath = os.path.join(jsonl_dir, jsonl_filename)

owlready_to_jsonl(path_rdf, jsonl_filepath, include_blank_nodes=True)

Writing RDF ontology to JSONL...
Reasoner failed: [Errno 2] No such file or directory: 'java', continuing without reasoning
JSONL file written to: /home/ubuntu/.turing/data/wine_ontology1.jsonl
Ontology: 87 nodes, 28 relationships
CPU times: user 72.9 ms, sys: 1.92 ms, total: 74.9 ms
Wall time: 84.5 ms


* Owlready2 * Running Pellet...
    java -Xmx2000M -cp /home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/httpcore-4.2.2.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-core-2.10.0.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/log4j-core-2.19.0.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/slf4j-log4j12-1.6.4.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jgrapht-jdk1.5.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/log4j-api-2.19.0.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/commons-codec-1.6.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/xml-apis-1.4.01.jar:/home/ubuntu/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-arq-2.10.0.jar:/home/ubun

'/home/ubuntu/.turing/data/wine_ontology1.jsonl'

In [17]:
! head $jsonl_filepath

{"type": "node", "id": "64", "labels": ["PotableLiquid"], "properties": {"id": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Wine", "uri": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Wine", "name": "Wine", "type": "Class", "rdfs_label": "wine"}}
{"type": "node", "id": "72", "labels": ["Winery"], "properties": {"id": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Winery", "uri": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Winery", "name": "Winery", "type": "Class"}}
{"type": "node", "id": "56", "labels": ["Vintage"], "properties": {"id": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Vintage", "uri": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Vintage", "name": "Vintage", "type": "Class"}}
{"type": "node", "id": "69", "labels": ["Grape"], "properties": {"id": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WineGrape", "uri": "http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WineGrape", "name": "WineGrape", "type": "Cl

In [18]:
! tail $jsonl_filepath

{"type": "relationship", "id": "18", "label": "hasRange", "start": {"id": "75"}, "end": {"id": "65"}, "properties": {}}
{"type": "relationship", "id": "19", "label": "hasDomain", "start": {"id": "64"}, "end": {"id": "76"}, "properties": {}}
{"type": "relationship", "id": "20", "label": "hasRange", "start": {"id": "76"}, "end": {"id": "66"}, "properties": {}}
{"type": "relationship", "id": "21", "label": "hasRange", "start": {"id": "82"}, "end": {"id": "42"}, "properties": {}}
{"type": "relationship", "id": "22", "label": "hasDomain", "start": {"id": "56"}, "end": {"id": "80"}, "properties": {}}
{"type": "relationship", "id": "23", "label": "hasRange", "start": {"id": "80"}, "end": {"id": "57"}, "properties": {}}
{"type": "relationship", "id": "24", "label": "hasDomain", "start": {"id": "42"}, "end": {"id": "74"}, "properties": {}}
{"type": "relationship", "id": "25", "label": "hasRange", "start": {"id": "74"}, "end": {"id": "42"}, "properties": {}}
{"type": "relationship", "id": "26", 

# Create `turingdb` graph

In [19]:
print(f"Creating graph: {graph_name}")

Creating graph: wine_ontology1


In [20]:
%%time

start_time = time.time()

# Load JSONL into TuringDB
print(f"\nLoading JSONL into TuringDB as graph '{graph_name}' ...")
client.query(f"LOAD JSONL '{jsonl_filename}' AS {graph_name}")

execution_time = time.time() - start_time
print(f"\n\u2713 Graph '{graph_name}' created in {execution_time:.2f} seconds")

# Set active graph for querying
client.set_graph(graph_name)


Loading JSONL into TuringDB as graph 'wine_ontology1' ...

âœ“ Graph 'wine_ontology1' created in 0.05 seconds
CPU times: user 2.64 ms, sys: 28 Î¼s, total: 2.67 ms
Wall time: 50.1 ms


In [21]:
# Returns the commit history
client.query("CALL db.history()")

Unnamed: 0,commit,nodeCount,edgeCount,partCount
0,41701431b16f2289,0,0,0
1,3821eee812b44ab,87,28,1


# Query `turingdb` graph

## Use metaqueries to have insight on graph overall structure

<h3>
    To learn more about ðŸ“® Metaqueries, please check TuringDB documentation on this <a href="https://turingdb.mintlify.app/query/cypher_subset#%F0%9F%93%AE-metaqueries">link</a>
</h3>

In [23]:
%%time

# CALL propertyTypes() - returns a column of all the different node and edge properties and their types in the database
command = """
CALL db.propertyTypes()
"""
df_propertyTypes = client.query(command)
if df_propertyTypes.empty:
    print("No result found")
else:
    display(df_propertyTypes)

Unnamed: 0,id,propertyType,valueType
0,0,id,String
1,1,name,String
2,2,rdfs_label,String
3,3,type,String
4,4,uri,String
5,5,comment,String
6,6,parents,String
7,7,property_type,String


CPU times: user 3.4 ms, sys: 18 Î¼s, total: 3.42 ms
Wall time: 3.22 ms


In [24]:
# Get node properties
nodes_properties = df_propertyTypes["propertyType"].values.tolist()
print(f"Node properties: {nodes_properties}")

Node properties: ['id', 'name', 'rdfs_label', 'type', 'uri', 'comment', 'parents', 'property_type']


In [25]:
%%time

# CALL labels () - returns a column of all the different node labels
command = """
CALL db.labels()
"""
df_labels = client.query(command)
if df_labels.empty:
    print("No result found")
else:
    display(df_labels)

Unnamed: 0,id,label
0,0,PotableLiquid
1,1,Winery
2,2,Vintage
3,3,Grape
4,4,WhiteWine
...,...,...
60,60,Beaujolais
61,61,Anjou
62,62,AmericanWine
63,63,AlsatianWine


CPU times: user 3.31 ms, sys: 14 Î¼s, total: 3.32 ms
Wall time: 3.29 ms


In [26]:
%%time

# CALL edgeTypes() - returns a column of all the different edge types (edge equivalent of node labels)
command = """
CALL db.edgeTypes()
"""
df_edgeTypes = client.query(command)
if df_edgeTypes.empty:
    print("No result found")
else:
    display(df_edgeTypes)

Unnamed: 0,id,edgeType
0,0,subClassOf
1,1,hasDomain
2,2,hasRange


CPU times: user 2.7 ms, sys: 0 ns, total: 2.7 ms
Wall time: 2.64 ms


## Counts

In [27]:
%%time

# Find number of nodes and number of edges in the graph
n_nodes = len(client.query("MATCH (n) RETURN n"))
n_edges = len(client.query("MATCH (n)-->(m) RETURN n, m"))
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Graph: 87 nodes and 28 edges

CPU times: user 2.07 ms, sys: 3 Î¼s, total: 2.07 ms
Wall time: 1.99 ms


In [28]:
%%time

# Count all nodes
command = """
MATCH (n)
RETURN COUNT(n)
"""
df_count_nodes = client.query(command)
display(df_count_nodes)

# Count all edges
command = """
MATCH (n)-->()
RETURN COUNT(n)
"""
df_count_edges = client.query(command)
display(df_count_edges)

# Find number of nodes and number of edges in the graph
n_nodes = int(df_count_nodes.loc[0, "COUNT(n)"])
n_edges = int(df_count_edges.loc[0, "COUNT(n)"])
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Unnamed: 0,COUNT(n)
0,87


Unnamed: 0,COUNT(n)
0,28


Graph: 87 nodes and 28 edges

CPU times: user 4.56 ms, sys: 0 ns, total: 4.56 ms
Wall time: 4.37 ms


In [29]:
# Count number of nodes for each label
for label in df_labels["label"]:
    print(100 * '-')
    print(f"label: {label}")
    df_curr_label = client.query(f"""
    MATCH (n:{label})
    RETURN n.name
    """)
    df_curr_label_count = client.query(f"""
    MATCH (n:{label})
    RETURN count(n)
    """)
    display(df_curr_label)
    display(df_curr_label_count)
    
    print()
print(100 * '-')

----------------------------------------------------------------------------------------------------
label: PotableLiquid


Unnamed: 0,n.name
0,Wine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Winery


Unnamed: 0,n.name
0,Winery


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Vintage


Unnamed: 0,n.name
0,Vintage


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Grape


Unnamed: 0,n.name
0,WineGrape


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteWine


Unnamed: 0,n.name
0,WhiteWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteTableWine


Unnamed: 0,n.name
0,WhiteTableWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: TableWine


Unnamed: 0,n.name
0,TableWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteNonSweetWine


Unnamed: 0,n.name
0,WhiteNonSweetWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteLoire


Unnamed: 0,n.name
0,WhiteLoire


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Loire


Unnamed: 0,n.name
0,Loire


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteBurgundy


Unnamed: 0,n.name
0,WhiteBurgundy


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Burgundy


Unnamed: 0,n.name
0,Burgundy


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WhiteBordeaux


Unnamed: 0,n.name
0,WhiteBordeaux


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Bordeaux


Unnamed: 0,n.name
0,Bordeaux
1,Sauternes


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: Region


Unnamed: 0,n.name
0,Region


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: VintageYear


Unnamed: 0,n.name
0,VintageYear


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WineDescriptor


Unnamed: 0,n.name
0,WineTaste
1,WineColor
2,WineFlavor
3,WineBody
4,WineSugar
5,WineDescriptor


Unnamed: 0,count(n)
0,6



----------------------------------------------------------------------------------------------------
label: Zinfandel


Unnamed: 0,n.name
0,Zinfandel


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Tours


Unnamed: 0,n.name
0,Tours


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: SweetWine


Unnamed: 0,n.name
0,SweetWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DessertWine


Unnamed: 0,n.name
0,SweetRiesling


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Wine


Unnamed: 0,n.name
0,EarlyHarvest
1,LateHarvest
2,DessertWine


Unnamed: 0,count(n)
0,3



----------------------------------------------------------------------------------------------------
label: Riesling


Unnamed: 0,n.name
0,Riesling


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: StEmilion


Unnamed: 0,n.name
0,StEmilion


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: SemillonOrSauvignonBlanc


Unnamed: 0,n.name
0,SemillonOrSauvignonBlanc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Semillon


Unnamed: 0,n.name
0,Semillon


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: SauvignonBlanc


Unnamed: 0,n.name
0,SauvignonBlanc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Sancerre


Unnamed: 0,n.name
0,Sancerre


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RoseWine


Unnamed: 0,n.name
0,RoseWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RedWine


Unnamed: 0,n.name
0,RedWine
1,Port


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: RedTableWine


Unnamed: 0,n.name
0,RedTableWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RedBurgundy


Unnamed: 0,n.name
0,RedBurgundy


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RedBordeaux


Unnamed: 0,n.name
0,RedBordeaux


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: PinotNoir


Unnamed: 0,n.name
0,PinotNoir


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: PinotBlanc


Unnamed: 0,n.name
0,PinotBlanc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: PetiteSyrah


Unnamed: 0,n.name
0,PetiteSyrah


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Pauillac


Unnamed: 0,n.name
0,Pauillac


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Medoc


Unnamed: 0,n.name
0,Medoc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Muscadet


Unnamed: 0,n.name
0,Muscadet


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Meursault


Unnamed: 0,n.name
0,Meursault


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Merlot


Unnamed: 0,n.name
0,Merlot


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Meritage


Unnamed: 0,n.name
0,Meritage


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Margaux


Unnamed: 0,n.name
0,Margaux


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: ItalianWine


Unnamed: 0,n.name
0,ItalianWine
1,Chianti


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: IceWine


Unnamed: 0,n.name
0,IceWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: GermanWine


Unnamed: 0,n.name
0,GermanWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Gamay


Unnamed: 0,n.name
0,Gamay


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: FullBodiedWine


Unnamed: 0,n.name
0,FullBodiedWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: FrenchWine


Unnamed: 0,n.name
0,FrenchWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DryWine


Unnamed: 0,n.name
0,DryWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DryWhiteWine


Unnamed: 0,n.name
0,DryWhiteWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DryRiesling


Unnamed: 0,n.name
0,DryRiesling


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: DryRedWine


Unnamed: 0,n.name
0,DryRedWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: CotesDOr


Unnamed: 0,n.name
0,CotesDOr


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: CheninBlanc


Unnamed: 0,n.name
0,CheninBlanc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Chardonnay


Unnamed: 0,n.name
0,Chardonnay


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: CaliforniaWine


Unnamed: 0,n.name
0,CaliforniaWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: TexasWine


Unnamed: 0,n.name
0,TexasWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: CabernetSauvignon


Unnamed: 0,n.name
0,CabernetSauvignon


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: CabernetFranc


Unnamed: 0,n.name
0,CabernetFranc


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Beaujolais


Unnamed: 0,n.name
0,Beaujolais


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Anjou


Unnamed: 0,n.name
0,Anjou


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: AmericanWine


Unnamed: 0,n.name
0,AmericanWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: AlsatianWine


Unnamed: 0,n.name
0,AlsatianWine


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Property


Unnamed: 0,n.name
0,producesWine
1,hasWineDescriptor
2,madeIntoWine
3,adjacentRegion
4,hasVintageYear
5,locatedIn
6,hasColor
7,hasBody
8,hasFlavor
9,hasSugar


Unnamed: 0,count(n)
0,13



----------------------------------------------------------------------------------------------------


## Queries

In [30]:
%%time

# Match all edges and return them
command = """
MATCH (n)-[e]->(m)
RETURN n.name, n.type, e, m.name, m.type
"""
df_all_edges = client.query(command)
if df_all_edges.empty:
    print("No result found")
else:
    display(df_all_edges)

Unnamed: 0,n.name,n.type,e,m.name,m.type
0,Wine,Class,0,hasWineDescriptor,Property
1,Wine,Class,1,hasColor,Property
2,Wine,Class,2,madeFromGrape,Property
3,Vintage,Class,3,hasVintageYear,Property
4,Sauternes,Class,4,LateHarvest,Class
5,Sauternes,Class,5,Bordeaux,Class
6,Region,Class,6,adjacentRegion,Property
7,VintageYear,Class,7,yearValue,Property
8,WineTaste,Class,8,WineDescriptor,Class
9,WineColor,Class,9,WineDescriptor,Class


CPU times: user 4.79 ms, sys: 0 ns, total: 4.79 ms
Wall time: 4.75 ms


In [31]:
%%time

# 1. Get all wine types (classes that are subclasses of Wine/PotableLiquid)
command = """
MATCH (wine:PotableLiquid)
WHERE wine.type = 'Class'
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,wine.parents
0,Wine,


CPU times: user 2.78 ms, sys: 0 ns, total: 2.78 ms
Wall time: 2.74 ms


In [32]:
%%time

# 2. Find all properties and their domains/ranges
command = """
MATCH (domain)-[:hasDomain]->(prop:Property)-[:hasRange]->(range)
RETURN domain.name, prop.name, prop.property_type, range.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,domain.name,prop.name,prop.property_type,range.name
0,Wine,hasWineDescriptor,DataProperty,WineDescriptor
1,Wine,hasColor,DataProperty,WineColor
2,Wine,madeFromGrape,DataProperty,WineGrape
3,Vintage,hasVintageYear,DataProperty,VintageYear
4,Region,adjacentRegion,DataProperty,Region


CPU times: user 2.96 ms, sys: 0 ns, total: 2.96 ms
Wall time: 2.86 ms


In [33]:
%%time

# 3. Get the complete class hierarchy for a specific wine (e.g., Chardonnay)
command = """
MATCH (leaf)-[:subClassOf]->(root)
WHERE leaf.name = 'Chardonnay'
RETURN leaf.name, root.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

No result found
CPU times: user 1.13 ms, sys: 7 Î¼s, total: 1.14 ms
Wall time: 1.12 ms


In [34]:
%%time

# 4. Find all red wines (wines that are subclass of RedWine)
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE parent.name = 'RedWine'
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,wine.parents
0,Port,RedWine


CPU times: user 1.69 ms, sys: 935 Î¼s, total: 2.63 ms
Wall time: 2.59 ms


In [35]:
%%time

# 5. Find all direct children of Wine class
command = """
MATCH (child)-[:subClassOf]->(parent)
WHERE parent.name = 'Wine'
RETURN child.name, child.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,child.name,child.type
0,EarlyHarvest,Class
1,LateHarvest,Class
2,DessertWine,Class


CPU times: user 2.71 ms, sys: 0 ns, total: 2.71 ms
Wall time: 2.54 ms


In [36]:
%%time

# 6. Get all properties that Wine class can have
command = """
MATCH (wine)-[:hasDomain]->(prop:Property)
WHERE wine.name = 'Wine'
RETURN prop.name, prop.property_type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,prop.name,prop.property_type
0,hasWineDescriptor,DataProperty
1,hasColor,DataProperty
2,madeFromGrape,DataProperty


CPU times: user 2.58 ms, sys: 0 ns, total: 2.58 ms
Wall time: 2.41 ms


In [37]:
%%time

# 7. Find wine classes that have the most parent classes (complex definitions)
#command = """
#MATCH (wine)-[:subClassOf]->(parent)
#WHERE wine.type = 'Class'
#WITH wine.name AS wine_name, COUNT(parent) AS parent_count
#RETURN wine_name, parent_count
#ORDER BY parent_count DESC
#LIMIT 10
#"""
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE wine.type = 'Class'
RETURN wine.name, parent.name
LIMIT 10
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,parent.name
0,Sauternes,LateHarvest
1,Sauternes,Bordeaux
2,WineTaste,WineDescriptor
3,WineColor,WineDescriptor
4,WineFlavor,WineTaste
5,WineBody,WineTaste
6,WineSugar,WineTaste
7,SweetRiesling,DessertWine
8,EarlyHarvest,Wine
9,LateHarvest,Wine


CPU times: user 3.22 ms, sys: 963 Î¼s, total: 4.18 ms
Wall time: 4.15 ms


In [38]:
%%time

# 8. Find all French wine types
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE parent.name = 'FrenchWine'
RETURN wine.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

No result found
CPU times: user 1.2 ms, sys: 0 ns, total: 1.2 ms
Wall time: 1.19 ms


In [39]:
print("Notebook finished !")

Notebook finished !
