<div class="alert alert-block alert-success">
    <h1>
        Example notebook - Integrate RDF file
    </h1>
    <p>
        Link to dataset : <a href="TR/owl-guide/wine.rdf">Link to Wine RDF file download</a>
    </p>
</div>

# Import modules and functions

In [1]:
import os
import re
import time
from tqdm.auto import tqdm

from turingdb_examples.graph import split_cypher_commands

In [2]:
%load_ext autoreload
%autoreload 2

# Check data files are available

In [3]:
example_name = "wine_ontology"
path_data = f"{os.getcwd()}/data/{example_name}"
if not os.path.exists(path_data):
    print(f"{path_data} does not exist")
    os.makedirs(path_data)
    print(f"{path_data} folder created âœ”")

list_files = sorted(os.listdir(path_data))
rdf_filename = "wine_ontology.rdf"
if not rdf_filename in list_files:
    print(f"Wine ontology RDF file is not available in {path_data}. \n")
    url_rdf = "https://www.w3.org/TR/owl-guide/wine.rdf"
    print(
        f"Downloading from: {url_rdf}"
        "\n...\n"
    )
    os.system(f"wget {url_rdf} -O {path_data}/{rdf_filename}")

# Load dataset

In [4]:
path_rdf = f"{path_data}/{example_name}.rdf"
path_rdf

'/home/dev/turingdb-examples/examples/notebooks/public_version/data/wine_ontology/wine_ontology.rdf'

# Graph Creation in TuringDB

## Build Cypher CREATE Commands

In [5]:
def sanitize_identifier(s):
    """Sanitize identifiers (labels, relationship types, property names)"""
    if not s:
        return "ID_empty"
    
    # Replace invalid characters with underscores
    s = s.replace('#', '_').replace('-', '_').replace('/', '_').replace('.', '_')
    
    # Prefix with ID_ if starts with digit or underscore
    if s[0].isdigit() or s[0] == '_':
        s = 'ID_' + s
    
    # Remove any other non-alphanumeric characters except underscores
    s = ''.join(c if c.isalnum() or c == '_' else '_' for c in s)
    
    return s


def sanitize_value(s):
    """Sanitize property values (escape quotes)"""
    return s.replace("'", "\\'").replace('"', '\\"')

In [6]:
def owlready_to_cypher(rdf_file, include_blank_nodes=False):
    from owlready2 import get_ontology, ThingClass, Thing, ObjectProperty, DataProperty, sync_reasoner_pellet
    
    onto = get_ontology(f"file://{rdf_file}").load()
    
    # Run reasoner
    try:
        with onto:
            sync_reasoner_pellet(infer_property_values=True, infer_data_property_values=True)
        print("Reasoner completed successfully")
    except Exception as e:
        print(f"Reasoner failed: {e}, continuing without reasoning")
    
    cypher_parts = []
    node_counter = 0
    node_to_var = {}
    
    def get_var(uri):
        nonlocal node_counter
        if uri not in node_to_var:
            node_to_var[uri] = f"n{node_counter}"
            node_counter += 1
        return node_to_var[uri]
    
    def get_most_generic_parent(cls):
        """Get the most generic non-Thing parent"""
        ancestors = [a for a in cls.ancestors() 
                    if isinstance(a, ThingClass) 
                    and a.name != 'Thing'
                    and a != cls]
        return ancestors[-1] if ancestors else cls
    
    # First pass: collect all nodes that will be created
    valid_nodes = set()
    node_parts = []
    
    # Add all classes
    for cls in onto.classes():
        if cls.name != 'Thing':
            valid_nodes.add(cls.iri)
    
    # Add all properties
    for prop in onto.properties():
        valid_nodes.add(prop.iri)
    
    # Add all individuals
    for ind in onto.individuals():
        valid_nodes.add(ind.iri)
    
    # Create nodes for all classes
    for cls in onto.classes():
        if cls.name == 'Thing':
            continue
        
        var = get_var(cls.iri)
        root = get_most_generic_parent(cls)
        label = sanitize_identifier(root.name)
        name = cls.name
        
        # Build properties
        props = [
            f"uri: '{sanitize_value(cls.iri)}'",
            f"name: '{sanitize_value(name)}'",
            f"type: 'Class'"
        ]
        
        # Add direct parent names (only those in valid_nodes)
        direct_parents = [p.name for p in cls.is_a 
                         if isinstance(p, ThingClass) 
                         and p.name != 'Thing'
                         and p.iri in valid_nodes]
        if direct_parents:
            props.append(f"parents: '{sanitize_value(', '.join(direct_parents))}'")
        
        # Add comment if exists
        if hasattr(cls, 'comment') and cls.comment:
            comment = cls.comment[0] if isinstance(cls.comment, list) else cls.comment
            props.append(f"comment: '{sanitize_value(str(comment))}'")
        
        # Add label if exists
        if hasattr(cls, 'label') and cls.label:
            lbl = cls.label[0] if isinstance(cls.label, list) else cls.label
            props.append(f"rdfs_label: '{sanitize_value(str(lbl))}'")
        
        node_parts.append(f"({var}:{label} {{{', '.join(props)}}})")
    
    # Create nodes for properties
    for prop in onto.properties():
        prop_var = get_var(prop.iri)
        prop_type = "ObjectProperty" if isinstance(prop, ObjectProperty) else "DataProperty"
        
        props_list = [
            f"uri: '{sanitize_value(prop.iri)}'",
            f"name: '{sanitize_value(prop.name)}'",
            f"type: 'Property'",
            f"property_type: '{prop_type}'"
        ]
        
        node_parts.append(f"({prop_var}:Property {{{', '.join(props_list)}}})")
    
    # Add all node creation as a single CREATE statement
    cypher_parts.append("CREATE " + ",\n".join(node_parts))
    
    # Track added edges to avoid duplicates
    added_edges = set()
    edge_parts = []
    
    # Create subClassOf relationships (only for valid nodes)
    for cls in onto.classes():
        if cls.name == 'Thing':
            continue
        
        s_uri = cls.iri
        
        for parent in cls.is_a:
            if isinstance(parent, ThingClass) and parent.name != 'Thing' and parent.iri in valid_nodes:
                o_uri = parent.iri
                edge_key = (s_uri, 'subClassOf', o_uri)
                
                if edge_key not in added_edges:
                    edge_parts.append(
                        f"MATCH (a {{uri: '{sanitize_value(s_uri)}'}}), (b {{uri: '{sanitize_value(o_uri)}'}}) "
                        f"CREATE (a)-[:subClassOf]->(b)"
                    )
                    added_edges.add(edge_key)
    
    # Add domain relationships (only for valid nodes)
    for prop in onto.properties():
        prop_uri = prop.iri
        
        if hasattr(prop, 'domain') and prop.domain:
            domains = prop.domain if isinstance(prop.domain, list) else [prop.domain]
            for domain in domains:
                if isinstance(domain, ThingClass) and domain.name != 'Thing' and domain.iri in valid_nodes:
                    domain_uri = domain.iri
                    edge_key = (domain_uri, 'hasDomain', prop_uri)
                    
                    if edge_key not in added_edges:
                        edge_parts.append(
                            f"MATCH (a {{uri: '{sanitize_value(domain_uri)}'}}), (b {{uri: '{sanitize_value(prop_uri)}'}}) "
                            f"CREATE (a)-[:hasDomain]->(b)"
                        )
                        added_edges.add(edge_key)
        
        # Add range relationships (only for valid nodes)
        if hasattr(prop, 'range') and prop.range:
            ranges = prop.range if isinstance(prop.range, list) else [prop.range]
            for rng in ranges:
                if isinstance(rng, ThingClass) and rng.name != 'Thing' and rng.iri in valid_nodes:
                    range_uri = rng.iri
                    edge_key = (prop_uri, 'hasRange', range_uri)
                    
                    if edge_key not in added_edges:
                        edge_parts.append(
                            f"MATCH (a {{uri: '{sanitize_value(prop_uri)}'}}), (b {{uri: '{sanitize_value(range_uri)}'}}) "
                            f"CREATE (a)-[:hasRange]->(b)"
                        )
                        added_edges.add(edge_key)
    
    # Handle individuals (only for valid nodes)
    for ind in onto.individuals():
        s_uri = ind.iri
        
        for prop in ind.get_properties():
            values = getattr(ind, prop.name, [])
            if not isinstance(values, list):
                values = [values]
            
            for value in values:
                if isinstance(value, Thing) and value.iri in valid_nodes:
                    o_uri = value.iri
                    pred = sanitize_identifier(prop.name)
                    edge_key = (s_uri, pred, o_uri)
                    
                    if edge_key not in added_edges:
                        edge_parts.append(
                            f"MATCH (a {{uri: '{sanitize_value(s_uri)}'}}), (b {{uri: '{sanitize_value(o_uri)}'}}) "
                            f"CREATE (a)-[:{pred}]->(b)"
                        )
                        added_edges.add(edge_key)
    
    # Add all edge creation statements
    cypher_parts.extend(edge_parts)
    
    return "\n".join(cypher_parts)

In [7]:
%%time

graph_CREATE_command_owlready = owlready_to_cypher(path_rdf, include_blank_nodes=True)

# Save Cypher query to file
with open(f"{path_data}/{example_name}_owlready.cypher", "w") as f:
    f.write(graph_CREATE_command_owlready)

graph_CREATE_command = graph_CREATE_command_owlready

print(f"""
Cypher CREATE command :
* size: {len(graph_CREATE_command.encode('utf-8'))/1024/1000:.4f} MB\n
{100 * '*'}
{graph_CREATE_command if len(graph_CREATE_command.split("\n")) < 10000 else "\n".join(graph_CREATE_command.split('\n')[:5]) + "\n...\n" + "\n".join(graph_CREATE_command.split('\n')[-5:])}
{100 * '*'}
""")

* Owlready2 * Running Pellet...
    java -Xmx2000M -cp /home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/antlr-3.2.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-core-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/owlapi-distribution-3.4.3-bin.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/xercesImpl-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/xml-apis-1.4.01.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jena-arq-2.10.0.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/slf4j-log4j12-1.6.4.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/jgrapht-jdk1.5.jar:/home/dev/turingdb-examples/.venv/lib/python3.13/site-packages/owlready2/pellet/log4j-core-2.19.0.jar:/home/dev/turingdb-examples/.

Reasoner completed successfully

Cypher CREATE command :
* size: 0.0386 MB

****************************************************************************************************
CREATE (n0:PotableLiquid {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Wine', name: 'Wine', type: 'Class', rdfs_label: 'wine'}),
(n1:Winery {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Winery', name: 'Winery', type: 'Class'}),
(n2:Vintage {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##Vintage', name: 'Vintage', type: 'Class'}),
(n3:Grape {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WineGrape', name: 'WineGrape', type: 'Class'}),
(n4:Wine {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WhiteWine', name: 'WhiteWine', type: 'Class', parents: 'Wine'}),
(n5:DryWhiteWine {uri: 'http://www.w3.org/TR/2003/PR-owl-guide-20031209/wine##WhiteTableWine', name: 'WhiteTableWine', type: 'Class', parents: 'DryWine, TableWine, WhiteNonSweetWine'}),
(n6:DryW

* Owlready2 * Pellet took 1.41410493850708 seconds
* Owlready * (NB: only changes on entities loaded in Python are shown, other changes are done but not listed)


## Split command into chunks

In [8]:
%%time

chunks = split_cypher_commands(graph_CREATE_command, max_size_mb=1)

print(f"âœ“ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

print("\nNode chunks:")
for i, chunk in enumerate(chunks['node_chunks']):
    print(f"  Node chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

print("\nEdge chunks:")
for i, chunk in enumerate(chunks['edge_chunks']):
    print(f"  Edge chunk {i+1}: {len(chunk.encode('utf-8'))/1024:.1f} KB")
    if i == 10:
        print("  ...")
        break

âœ“ Split into 1 node chunk(s) and 135 edge chunk(s)

Node chunks:
  Node chunk 1: 13.3 KB

Edge chunks:
  Edge chunk 1: 0.2 KB
  Edge chunk 2: 0.2 KB
  Edge chunk 3: 0.2 KB
  Edge chunk 4: 0.2 KB
  Edge chunk 5: 0.2 KB
  Edge chunk 6: 0.2 KB
  Edge chunk 7: 0.2 KB
  Edge chunk 8: 0.2 KB
  Edge chunk 9: 0.2 KB
  Edge chunk 10: 0.2 KB
  Edge chunk 11: 0.2 KB
  ...
CPU times: user 1.03 ms, sys: 2 Î¼s, total: 1.03 ms
Wall time: 1 ms


# Create graph using `turingdb` python package

<div class="alert alert-block alert-info">
    <h2>
        See <a href="https://docs.turingdb.ai/quickstart">TuringDB Get started documentation</a> for the important steps to follow :
    </h2>
    <h3>
        <ul>
            <li>Create your TuringDB account</li>
            <li>Create your instance in the <a href="https://console.turingdb.ai/auth">TuringDB Cloud UI</a></li>
            <li>Copy your Instance ID from the Database Instances management page</li>
            <li>Get API Key from the Settings in UI</li>
        </ul>
        Remember to have your instance active while working in this notebook !
    </h3>
</div>

In [9]:
from turingdb import TuringDB

# Create TuringDB client
# set host parameter to the URL (as string) on which TuringDB is running,
# default "http://localhost:6666"
client = TuringDB(host="http://localhost:6666")
try:
    client.warmup()
except Exception as e:
    print(f"TuringDB not started, please run `uv run turingdb` in your terminal")

In [10]:
# Get list of available graphs
list_graphs = client.list_available_graphs()

In [11]:
client.list_loaded_graphs()

['healthcare_dataset1',
 'crypto_orbitaal_fraud_detection1',
 'citeab_antibody1',
 'default']

In [12]:
# Set graph name
graph_name_prefix = example_name
graph_name_nb_suffix = str(
    max(
        [
            int(re.sub(graph_name_prefix, "", g))
            for g in list_graphs
            if g.startswith(graph_name_prefix)
            and re.sub(graph_name_prefix, "", g).isdigit()
        ]
        + [0]
    )
    + 1
)
graph_name = graph_name_prefix + graph_name_nb_suffix
graph_name = re.sub("-", "_", graph_name)
print(f"graph_name: {graph_name}")

graph_name: wine_ontology1


In [13]:
from turingdb.exceptions import TuringDBException

In [14]:
%%time

# Set graph
try:
    client.create_graph(graph_name)
except TuringDBException as e:
    print(e)

# Set working graph
client.set_graph(graph_name)

CPU times: user 2.12 ms, sys: 98 Î¼s, total: 2.22 ms
Wall time: 8.5 ms


In [15]:
%%time

# Create a new change on the graph
client.checkout()
change = client.new_change()
print(f"Current change {change}")

# Checkout into the change
client.checkout(change=change)

Current change 0
CPU times: user 2.58 ms, sys: 0 ns, total: 2.58 ms
Wall time: 2.07 ms


In [16]:
%%time

# Run CREATE command
print("\nExecuting query on TuringDB...")
start_time = time.time()

print(f"âœ“ Split into {len(chunks['node_chunks'])} node chunk(s) and {len(chunks['edge_chunks'])} edge chunk(s)")

# CREATE nodes
print("\nNode chunks:")
for i, chunk in enumerate(tqdm(chunks['node_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"âœ“ {len(chunks['node_chunks'])} node chunks done")

# CREATE edges
print("\nEdge chunks:")
for i, chunk in enumerate(tqdm(chunks['edge_chunks'])):
    result = client.query(chunk)
# Commit the change
client.query("COMMIT")
print(f"âœ“ {len(chunks['edge_chunks'])} edge chunks done")

execution_time = time.time() - start_time
print(f"\nâœ“ Graph created successfully in {execution_time:.2f} seconds")

# Submit changes
start_time = time.time()
client.query("CHANGE SUBMIT")
execution_time = time.time() - start_time
print(f"\nâœ“ Changes successfully submitted in {execution_time:.2f} seconds")

# Checkout into main
client.checkout()


Executing query on TuringDB...
âœ“ Split into 1 node chunk(s) and 135 edge chunk(s)

Node chunks:


  0%|          | 0/1 [00:00<?, ?it/s]

âœ“ 1 node chunks done

Edge chunks:


  0%|          | 0/135 [00:00<?, ?it/s]

âœ“ 135 edge chunks done

âœ“ Graph created successfully in 0.12 seconds

âœ“ Changes successfully submitted in 0.05 seconds
CPU times: user 107 ms, sys: 13 ms, total: 120 ms
Wall time: 172 ms


In [17]:
# Returns the commit history
client.query("CALL db.history()")

Unnamed: 0,commit,nodeCount,edgeCount,partCount
0,fa689f59a9e5b108,0,0,0
1,5333a944219b0fd4,87,0,1
2,3b2f73b3b4b7b7b0,0,135,1
3,7d43a641d8d05fbe,0,0,0


<div class="alert alert-block alert-info">
    <h2>
        Visualize your graph in TuringDB Graph Visualizer ! Now that your instance is running:
    </h2>
    <h3>
        <ul>
            <li>Go to <a href="https://console.turingdb.ai/databases">TuringDB Console - Database Instances</a></li>
            <li>In your current instance panel, click on "Open Visualizer" button</li>
            <li>Visualizer opens, now you can choose your graph in the dropdown menu at the top-right corner</li>
        </ul>
        You can then play with your graph and visualize the nodes you want !
    </h3>
</div>

# Query TuringDB

## Use metaqueries to have insight on graph overall structure

<h3>
    To learn more about ðŸ“® Metaqueries, please check TuringDB documentation on this <a href="https://turingdb.mintlify.app/query/cypher_subset#%F0%9F%93%AE-metaqueries">link</a>
</h3>

In [18]:
%%time

# CALL propertyTypes() - returns a column of all the different node and edge properties and their types in the database
command = """
CALL db.propertyTypes()
"""
df_propertyTypes = client.query(command)
if df_propertyTypes.empty:
    print("No result found")
else:
    display(df_propertyTypes)

Unnamed: 0,id,propertyType,valueType
0,0,rdfs_label,String
1,1,type,String
2,2,name,String
3,3,uri,String
4,4,parents,String
5,5,comment,String
6,6,property_type,String


CPU times: user 4.3 ms, sys: 28 Î¼s, total: 4.32 ms
Wall time: 3.88 ms


In [19]:
# Get node properties
nodes_properties = df_propertyTypes["propertyType"].values.tolist()
print(f"Node properties: {nodes_properties}")

Node properties: ['rdfs_label', 'type', 'name', 'uri', 'parents', 'comment', 'property_type']


In [20]:
%%time

# CALL labels () - returns a column of all the different node labels
command = """
CALL db.labels()
"""
df_labels = client.query(command)
if df_labels.empty:
    print("No result found")
else:
    display(df_labels)

Unnamed: 0,id,label
0,0,PotableLiquid
1,1,Winery
2,2,Vintage
3,3,Grape
4,4,Wine
5,5,DryWhiteWine
6,6,DryWine
7,7,Bordeaux
8,8,Region
9,9,VintageYear


CPU times: user 3.34 ms, sys: 934 Î¼s, total: 4.28 ms
Wall time: 3.79 ms


In [21]:
%%time

# CALL edgeTypes() - returns a column of all the different edge types (edge equivalent of node labels)
command = """
CALL db.edgeTypes()
"""
df_edgeTypes = client.query(command)
if df_edgeTypes.empty:
    print("No result found")
else:
    display(df_edgeTypes)

Unnamed: 0,id,edgeType
0,0,subClassOf
1,1,hasDomain
2,2,hasRange


CPU times: user 3.84 ms, sys: 87 Î¼s, total: 3.93 ms
Wall time: 3.41 ms


## Counts

In [22]:
%%time

# Find number of nodes and number of edges in the graph
n_nodes = len(client.query("MATCH (n) RETURN n"))
n_edges = len(client.query("MATCH (n)-->(m) RETURN n, m"))
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Graph: 87 nodes and 135 edges

CPU times: user 2.48 ms, sys: 1.05 ms, total: 3.53 ms
Wall time: 4.78 ms


In [23]:
%%time

# Count all nodes
command = """
MATCH (n)
RETURN COUNT(n)
"""
df_count_nodes = client.query(command)
display(df_count_nodes)

# Count all edges
command = """
MATCH (n)-->()
RETURN COUNT(n)
"""
df_count_edges = client.query(command)
display(df_count_edges)

# Find number of nodes and number of edges in the graph
n_nodes = int(df_count_nodes.loc[0, "COUNT(n)"])
n_edges = int(df_count_edges.loc[0, "COUNT(n)"])
print(f"Graph: {n_nodes:,} nodes and {n_edges:,} edges\n")

Unnamed: 0,COUNT(n)
0,87


Unnamed: 0,COUNT(n)
0,135


Graph: 87 nodes and 135 edges

CPU times: user 5.76 ms, sys: 998 Î¼s, total: 6.75 ms
Wall time: 6.29 ms


In [24]:
# Count number of nodes for each label
for label in df_labels["label"]:
    print(100 * '-')
    print(f"label: {label}")
    df_curr_label = client.query(f"""
    MATCH (n:{label})
    RETURN n.name
    """)
    df_curr_label_count = client.query(f"""
    MATCH (n:{label})
    RETURN count(n)
    """)
    display(df_curr_label)
    display(df_curr_label_count)
    
    print()
print(100 * '-')

----------------------------------------------------------------------------------------------------
label: PotableLiquid


Unnamed: 0,n.name
0,Meritage
1,CheninBlanc
2,Chardonnay
3,PinotBlanc
4,PinotNoir
5,Port
6,Sancerre
7,LateHarvest
8,SauvignonBlanc
9,Semillon


Unnamed: 0,count(n)
0,16



----------------------------------------------------------------------------------------------------
label: Winery


Unnamed: 0,n.name
0,Winery


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Vintage


Unnamed: 0,n.name
0,Vintage


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Grape


Unnamed: 0,n.name
0,WineGrape


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Wine


Unnamed: 0,n.name
0,EarlyHarvest
1,ItalianWine
2,GermanWine
3,Gamay
4,CaliforniaWine
5,WhiteWine
6,TexasWine
7,RedWine
8,RoseWine
9,FullBodiedWine


Unnamed: 0,count(n)
0,17



----------------------------------------------------------------------------------------------------
label: DryWhiteWine


Unnamed: 0,n.name
0,DryRiesling
1,Muscadet
2,WhiteBurgundy
3,WhiteTableWine


Unnamed: 0,count(n)
0,4



----------------------------------------------------------------------------------------------------
label: DryWine


Unnamed: 0,n.name
0,DryWhiteWine
1,Burgundy
2,TableWine


Unnamed: 0,count(n)
0,3



----------------------------------------------------------------------------------------------------
label: Bordeaux


Unnamed: 0,n.name
0,RedBordeaux
1,WhiteBordeaux


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: Region


Unnamed: 0,n.name
0,Region


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: VintageYear


Unnamed: 0,n.name
0,VintageYear


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: WineDescriptor


Unnamed: 0,n.name
0,WineTaste
1,WineFlavor
2,WineDescriptor
3,WineBody
4,WineSugar


Unnamed: 0,count(n)
0,5



----------------------------------------------------------------------------------------------------
label: WineColor


Unnamed: 0,n.name
0,WineColor


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RedTableWine


Unnamed: 0,n.name
0,CabernetSauvignon
1,DryRedWine
2,CotesDOr
3,Chianti
4,CabernetFranc
5,Beaujolais
6,Zinfandel
7,Margaux
8,Merlot
9,Medoc


Unnamed: 0,count(n)
0,14



----------------------------------------------------------------------------------------------------
label: CheninBlanc


Unnamed: 0,n.name
0,Tours


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: LateHarvest


Unnamed: 0,n.name
0,IceWine
1,Sauternes


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: TableWine


Unnamed: 0,n.name
0,RedTableWine
1,DryWine


Unnamed: 0,count(n)
0,2



----------------------------------------------------------------------------------------------------
label: Chardonnay


Unnamed: 0,n.name
0,Meursault


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: RoseWine


Unnamed: 0,n.name
0,Anjou


Unnamed: 0,count(n)
0,1



----------------------------------------------------------------------------------------------------
label: Property


Unnamed: 0,n.name
0,producesWine
1,hasWineDescriptor
2,madeIntoWine
3,adjacentRegion
4,hasVintageYear
5,locatedIn
6,hasColor
7,hasBody
8,hasFlavor
9,hasSugar


Unnamed: 0,count(n)
0,13



----------------------------------------------------------------------------------------------------


## Queries

In [25]:
%%time

# Match all edges and return them
command = """
MATCH (n)-[e]->(m)
RETURN n.name, n.type, e, m.name, m.type
"""
df_all_edges = client.query(command)
if df_all_edges.empty:
    print("No result found")
else:
    display(df_all_edges)

Unnamed: 0,n.name,n.type,e,m.name,m.type
0,Meritage,Class,0,RedWine,Class
1,CheninBlanc,Class,1,WhiteNonSweetWine,Class
2,Chardonnay,Class,2,WhiteWine,Class
3,PinotBlanc,Class,3,WhiteWine,Class
4,PinotNoir,Class,4,RedWine,Class
...,...,...,...,...,...
130,hasColor,Property,130,WineColor,Class
131,hasBody,Property,131,WineBody,Class
132,hasFlavor,Property,132,WineFlavor,Class
133,hasSugar,Property,133,WineSugar,Class


CPU times: user 4.67 ms, sys: 965 Î¼s, total: 5.64 ms
Wall time: 5.26 ms


In [26]:
%%time

# 1. Get all wine types (classes that are subclasses of Wine/PotableLiquid)
command = """
MATCH (wine:PotableLiquid)
WHERE wine.type = 'Class'
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,wine.parents
0,Meritage,RedWine
1,CheninBlanc,WhiteNonSweetWine
2,Chardonnay,WhiteWine
3,PinotBlanc,WhiteWine
4,PinotNoir,RedWine
5,Port,"RedWine, FullBodiedWine, SweetWine"
6,Sancerre,"WhiteLoire, SauvignonBlanc, WhiteNonSweetWine"
7,LateHarvest,"SweetWine, Wine"
8,SauvignonBlanc,SemillonOrSauvignonBlanc
9,Semillon,SemillonOrSauvignonBlanc


CPU times: user 3.48 ms, sys: 53 Î¼s, total: 3.53 ms
Wall time: 3.23 ms


In [27]:
%%time

# 2. Find all properties and their domains/ranges
command = """
MATCH (domain)-[:hasDomain]->(prop:Property)-[:hasRange]->(range)
RETURN domain.name, prop.name, prop.property_type, range.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,domain.name,prop.name,prop.property_type,range.name
0,Wine,hasColor,DataProperty,WineColor
1,Wine,hasWineDescriptor,DataProperty,WineDescriptor
2,Wine,madeFromGrape,DataProperty,WineGrape
3,Vintage,hasVintageYear,DataProperty,VintageYear
4,Region,adjacentRegion,DataProperty,Region


CPU times: user 4.05 ms, sys: 4 Î¼s, total: 4.05 ms
Wall time: 3.73 ms


In [28]:
%%time

# 3. Get the complete class hierarchy for a specific wine (e.g., Chardonnay)
command = """
MATCH (leaf)-[:subClassOf]->(root)
WHERE leaf.name = 'Chardonnay'
RETURN leaf.name, root.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,leaf.name,root.name
0,Chardonnay,WhiteWine


CPU times: user 3.25 ms, sys: 28 Î¼s, total: 3.28 ms
Wall time: 2.87 ms


In [29]:
%%time

# 4. Find all red wines (wines that are subclass of RedWine)
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE parent.name = 'RedWine'
RETURN wine.name, wine.parents
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,wine.parents
0,Meritage,RedWine
1,PinotNoir,RedWine
2,Port,"RedWine, FullBodiedWine, SweetWine"
3,RedBordeaux,"Bordeaux, RedWine"
4,DryRedWine,"RedWine, DryWine, TableWine"
5,RedTableWine,"RedWine, DryWine, TableWine"


CPU times: user 3.28 ms, sys: 32 Î¼s, total: 3.31 ms
Wall time: 2.99 ms


In [30]:
%%time

# 5. Find all direct children of Wine class
command = """
MATCH (child)-[:subClassOf]->(parent)
WHERE parent.name = 'Wine'
RETURN child.name, child.type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,child.name,child.type
0,LateHarvest,Class
1,EarlyHarvest,Class
2,ItalianWine,Class
3,GermanWine,Class
4,Gamay,Class
5,CaliforniaWine,Class
6,WhiteWine,Class
7,TexasWine,Class
8,RedWine,Class
9,RoseWine,Class


CPU times: user 3.52 ms, sys: 60 Î¼s, total: 3.58 ms
Wall time: 3.27 ms


In [31]:
%%time

# 6. Get all properties that Wine class can have
command = """
MATCH (wine)-[:hasDomain]->(prop:Property)
WHERE wine.name = 'Wine'
RETURN prop.name, prop.property_type
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,prop.name,prop.property_type
0,hasColor,DataProperty
1,hasWineDescriptor,DataProperty
2,madeFromGrape,DataProperty


CPU times: user 3.2 ms, sys: 23 Î¼s, total: 3.23 ms
Wall time: 2.91 ms


In [32]:
%%time

# 7. Find wine classes that have the most parent classes (complex definitions)
#command = """
#MATCH (wine)-[:subClassOf]->(parent)
#WHERE wine.type = 'Class'
#WITH wine.name AS wine_name, COUNT(parent) AS parent_count
#RETURN wine_name, parent_count
#ORDER BY parent_count DESC
#LIMIT 10
#"""
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE wine.type = 'Class'
RETURN wine.name, parent.name
LIMIT 10
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

Unnamed: 0,wine.name,parent.name
0,Meritage,RedWine
1,CheninBlanc,WhiteNonSweetWine
2,Chardonnay,WhiteWine
3,PinotBlanc,WhiteWine
4,PinotNoir,RedWine
5,Port,RedWine
6,Port,FullBodiedWine
7,Port,SweetWine
8,Sancerre,WhiteLoire
9,Sancerre,SauvignonBlanc


CPU times: user 2.48 ms, sys: 940 Î¼s, total: 3.42 ms
Wall time: 3.06 ms


In [33]:
%%time

# 8. Find all French wine types
command = """
MATCH (wine)-[:subClassOf]->(parent)
WHERE parent.name = 'FrenchWine'
RETURN wine.name
"""
df = client.query(command)
if df.empty:
    print("No result found")
else:
    display(df)

No result found
CPU times: user 1.37 ms, sys: 0 ns, total: 1.37 ms
Wall time: 1.07 ms


In [34]:
print("Notebook finished !")

Notebook finished !
