# On the pertinence of LLMs for ontology learning: results analysis

This notebook contains the functions we used to construct our results. 
We try to automate as much as possible our analysis, particularly leveraging some SPARQL queries.
However, human are still needed to align the ontologies constructed with our base one, i.e., the pizza ontology.

In particular, we needed to align the ontologies independently of the exact labels.
Hence, to help us in this endeavour, we used a basic textual distance on the labels or end of the URI if no label were available.
In the end, the counting was done manually. 

## Notebook setup

In [None]:
import os

from rapidfuzz import process, fuzz
from rdflib import Graph, BNode
from rdflib.namespace import Namespace

import pandas as pd

In [None]:
# Define some namespaces to ease the URIs manipulation and visualisation

olaf_nlp4kgc_ns = Namespace("https://github.com/wikit-ai/olaf-llm-nlp4kgc2024/o/example#")
pizza_onto_ns = Namespace("http://www.co-ode.org/ontologies/pizza/pizza.owl#")

olaf_nlp4kgc_ns_bindings = {
    "olaf-nlp4kgc": olaf_nlp4kgc_ns,
    "pizza-onto": pizza_onto_ns
}

In [None]:
# Load the different RDF graphs

pizza_onto_graph = Graph()
pizza_onto_graph.parse(os.path.join(os.getenv("DATA_PATH"), "pizza_onto_ground_truth.ttl"))

llm_text2owl_graph = Graph()
# llm_text2owl_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_text_to_owl", "llm_owl_pizza_onto_openai.ttl"))
llm_text2owl_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_text_to_owl", "llm_owl_pizza_onto_mistral.ttl"))

olaf_llm_graph = Graph()
# olaf_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_pipeline", "llm_pipeline_pizza_kr_rdf_graph_openai.ttl"))
olaf_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_pipeline", "llm_pipeline_kr_pizza_rdf_graph_mistral.ttl"))


olaf_no_llm_graph = Graph()
olaf_no_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "no_llm_pipeline", "no_llm_pipeline_pizza_kr_rdf_graph.ttl"))

## SPARQL tools

In [None]:
# Some SPARQL queries we use for our analysis

owl_classes_sparql_q = """
            SELECT DISTINCT ?class WHERE {
                ?class rdf:type owl:Class .
            }
        """

owl_classes_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
            }
        """

owl_classes_en_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
                FILTER(LANG(?label) = "en").
            }
        """

owl_obj_props_sparql_q = """
            SELECT DISTINCT ?prop WHERE {
                ?prop rdf:type owl:ObjectProperty .
            }
        """

owl_named_individuals_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                ?ind rdf:type owl:NamedIndividual .
            }
        """

owl_named_ind_from_obj_props_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?ind ?prop ?o .
                }
                UNION
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?s ?prop ?ind .
                }
                UNION
                {
                    ?ind rdf:type ?class .
                    ?class rdf:type owl:Class .
                }
                UNION
                {
                    ?ind rdf:type owl:NamedIndividual .
                }
            }
        """

owl_named_classes_ind_from_obj_props_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?ind ?prop ?o .
                }
                UNION
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?s ?prop ?ind .
                }
                UNION
                {
                    ?ind rdf:type ?class .
                    ?class rdf:type owl:Class .
                }
                UNION
                {
                    ?ind rdf:type owl:NamedIndividual .
                }
                UNION
                {
                    ?ind rdf:type owl:Class .
                }
            }
        """

rdfs_subclassof_tuples_sparql_q = """
            SELECT DISTINCT ?child ?parent WHERE {
                ?child rdfs:subClassOf ?parent .
            }
        """

In [None]:
def get_sparql_q_res_fragments_skip_bnodes(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    """Run a SPARQL query over an RDF graph and return the URIs fragments skipping blank nodes.

    Parameters
    ----------
    sparql_q : str
        The SPARQL query.
    graph : Graph
        The RDF graph.
    ns : dict[str, Namespace]
        Namespaces to use to extract URIs fragments (i.e., end).

    Returns
    -------
    set[tuple]
        The results.
    """
    q_res = graph.query(sparql_q, initNs=ns)
    
    fragments = set()
    for res in q_res:
        t = []
        for item in res:
            if not isinstance(item, BNode):
                t.append(item.fragment)
        fragments.add(tuple(t))

    return fragments

In [None]:
def get_sparql_q_label_res(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    """Run a SPARQL query over an RDF graph and return the URIs labels.

    The SPARQL query must return labels, i.e., strings.

    Parameters
    ----------
    sparql_q : str
        The SPARQL query.
    graph : Graph
        The RDF graph.
    ns : dict[str, Namespace]
        Namespaces to use to extract URIs fragments (i.e., end).

    Returns
    -------
    set[tuple]
        The results.
    """
    q_res = graph.query(sparql_q, initNs=ns)

    labels = {str(res[0]) for res in q_res}

    return labels

In [None]:
def get_sparql_q_tuple_res(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    """Run a SPARQL query over an RDF graph and return the URIs labels.

    The SPARQL query must return labels, i.e., strings.

    Parameters
    ----------
    sparql_q : str
        The SPARQL query.
    graph : Graph
        The RDF graph.
    ns : dict[str, Namespace]
        Namespaces to use to extract URIs fragments (i.e., end).

    Returns
    -------
    set[tuple]
        The results.
    """
    q_res = graph.query(sparql_q, initNs=ns)

    labels = {(str(res[0]), str(res[1])) for res in q_res}

    return labels

## Ontologies overview

In [None]:
# Extract ontology components

llm_text2owl_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)

llm_text2owl_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)

llm_text2owl_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)

llm_text2owl_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)

llm_text2owl_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)

In [None]:
# Construct the counts table for visualisation

onto_overview = {
    "Count": [
        "OWL named classes", 
        "OWL object properties", 
        "OWL named individuals", 
        "OWL named individuals from object properties", 
        "RDFS subClassOf tuples"
    ],
    "Pizza Ontology": [
        len(pizza_onto_g_classes), 
        len(pizza_onto_g_props), 
        len(pizza_onto_g_individuals), 
        len(pizza_onto_g_individuals_from_props), 
        len(pizza_onto_g_subclassof_t)
    ],
    "Text to OWL": [
        len(llm_text2owl_g_classes), 
        len(llm_text2owl_g_props), 
        len(llm_text2owl_g_individuals), 
        len(llm_text2owl_g_individuals_from_props), 
        len(llm_text2owl_g_subclassof_t)
    ],
    "OLAF LLM": [
        len(olaf_llm_g_classes), 
        len(olaf_llm_g_props), 
        len(olaf_llm_g_individuals), 
        len(olaf_llm_g_individuals_from_props), 
        len(olaf_llm_g_subclassof_t)
    ],
    "OLAF no LLM": [
        len(olaf_no_llm_g_classes), 
        len(olaf_no_llm_g_props), 
        len(olaf_no_llm_g_individuals), 
        len(olaf_no_llm_g_individuals_from_props), 
        len(olaf_no_llm_g_subclassof_t)
    ]
}

df_overview = pd.DataFrame(onto_overview)

In [None]:
df_overview

## Ontologies evaluation

Classes

In [None]:
pizza_onto_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_en_labels_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_class_labels = {label.lower().replace(" ", "") for label in pizza_onto_class_labels}

In [None]:
llm_text2owl_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_labels_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
llm_text2owl_class_labels = list({label.lower().replace(" ", "") for label in llm_text2owl_class_labels})

# Uncomment this to visualise the string alignments
for true_label in pizza_onto_class_labels:
    res = process.extract(true_label, llm_text2owl_class_labels, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_classes_pizza_text2owl=24

In [None]:
olaf_llm_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_class_labels = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_llm_class_labels})

# Uncomment this to visualise the string alignments
for true_label in pizza_onto_class_labels:
    res = process.extract(true_label, olaf_llm_class_labels, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_classes_pizza_olaf_llm=19

In [None]:
olaf_no_llm_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_labels_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_class_labels = list({label.lower().replace(" ", "") for label in olaf_no_llm_class_labels})

# Uncomment this to visualise the string alignments
# for true_label in pizza_onto_class_labels:
#     res = process.extract(true_label, olaf_no_llm_class_labels, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_classes_pizza_olaf_no_llm=43

Individuals

In [None]:
pizza_onto_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_ind_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_ind_uri})

In [None]:
llm_text2owl_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
llm_text2owl_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in llm_text2owl_ind_uri})

# Uncomment this to visualise the string alignments
# for true_label in pizza_onto_ind_uri:
#     res = process.extract(true_label, llm_text2owl_ind_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_ind_pizza_text2owl=0

In [None]:
olaf_llm_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_llm_ind_uri})

# Uncomment this to visualise the string alignments
for true_label in pizza_onto_ind_uri:
    res = process.extract(true_label, olaf_llm_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 

In [None]:
common_ind_pizza_olaf_llm = 0

In [None]:
olaf_no_llm_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_no_llm_ind_uri})

# Uncomment this to visualise the string alignments
# for true_label in pizza_onto_ind_uri:
#     res = process.extract(true_label, olaf_no_llm_ind_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})")

In [None]:
common_ind_pizza_olaf_no_llm = 2

Classes + indiviuals

In [None]:
pizza_onto_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_classes_ind_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_classes_ind_uri})
pizza_onto_classes_ind_uri = [uri for uri in pizza_onto_classes_ind_uri if not(uri[0:2]=="nc")]

In [None]:
llm_text2owl_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
llm_text2owl_classes_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in llm_text2owl_classes_ind_uri})

# Uncomment this for loop to visualise the string alignments
for true_label in pizza_onto_classes_ind_uri:
    res = process.extract(true_label, llm_text2owl_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_classes_ind_pizza_text2owl=20

In [None]:
olaf_llm_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_classes_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_llm_classes_ind_uri})

# Uncomment this for loop to visualise the string alignments
# for true_label in pizza_onto_classes_ind_uri:
#     res = process.extract(true_label, olaf_llm_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_classes_ind_pizza_olaf_llm=14

In [None]:
olaf_no_llm_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_classes_ind_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_no_llm_classes_ind_uri})

# Uncomment this for loop to visualise the string alignments
# for true_label in pizza_onto_classes_ind_uri:
#     res = process.extract(true_label, olaf_no_llm_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 

In [None]:
common_classes_ind_pizza_olaf_no_llm=59

Object properties

In [None]:
pizza_onto_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_obj_prop_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_obj_prop_uri})

In [None]:
llm_text2owl_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
llm_text2owl_obj_prop_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in llm_text2owl_obj_prop_uri})

# Uncomment this to visualise the string alignments
# for true_label in pizza_onto_obj_prop_uri:
#     res = process.extract(true_label, llm_text2owl_obj_prop_uri, scorer=fuzz.WRatio, limit=2)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]})") 
    

In [None]:
common_obj_prop_pizza_text2owl=0

In [None]:
olaf_llm_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_obj_prop_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_llm_obj_prop_uri})

# Uncomment this to visualise the string alignments
for true_label in pizza_onto_obj_prop_uri:
    res = process.extract(true_label, olaf_llm_obj_prop_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 

In [None]:
common_obj_prop_pizza_olaf_llm = 0

In [None]:
olaf_no_llm_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_obj_prop_uri = list({label.lower().replace(str(olaf_nlp4kgc_ns), "") for label in olaf_no_llm_obj_prop_uri})

# Uncomment this to visualise the string alignments
# for true_label in pizza_onto_obj_prop_uri:
#     res = process.extract(true_label, olaf_no_llm_obj_prop_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 

In [None]:
common_obj_prop_pizza_olaf_no_llm = 3

SubClassOf tuples

In [None]:
pizza_onto_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=pizza_onto_graph, ns=olaf_nlp4kgc_ns_bindings)
pizza_onto_sub_uri = list({f"{label[0].lower().replace(str(pizza_onto_ns), '')}#{label[1].lower().replace(str(pizza_onto_ns), '')}" for label in pizza_onto_sub_uri if not(label[0][0:2]=="nc" or label[1][0:2]=="nc")})

In [None]:
llm_text2owl_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=llm_text2owl_graph, ns=olaf_nlp4kgc_ns_bindings)
llm_text2owl_sub_uri = list({f"{label[0].lower().replace(str(olaf_nlp4kgc_ns), '')}#{label[1].lower().replace(str(olaf_nlp4kgc_ns), '')}" for label in llm_text2owl_sub_uri})

# Uncomment this for loop to visualise the string alignments
for true_label in pizza_onto_sub_uri:
    res = process.extract(true_label, llm_text2owl_sub_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_sub_pizza_text2owl=13

In [None]:
olaf_llm_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_llm_sub_uri = list({f"{label[0].lower().replace(str(olaf_nlp4kgc_ns), '')}#{label[1].lower().replace(str(olaf_nlp4kgc_ns), '')}" for label in olaf_llm_sub_uri})

# Uncomment this for loop to visualise the string alignments
for true_label in pizza_onto_sub_uri:
    res = process.extract(true_label, olaf_llm_sub_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_sub_pizza_olaf_llm=4

In [None]:
olaf_no_llm_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_no_llm_graph, ns=olaf_nlp4kgc_ns_bindings)
olaf_no_llm_sub_uri = list({f"{label[0].lower().replace(str(olaf_nlp4kgc_ns), '')}#{label[1].lower().replace(str(olaf_nlp4kgc_ns), '')}" for label in olaf_no_llm_sub_uri})

# Uncomment this for loop to visualise the string alignments
# for true_label in pizza_onto_sub_uri:
#     res = process.extract(true_label, olaf_no_llm_sub_uri, scorer=fuzz.WRatio, limit=3)
#     print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

In [None]:
common_sub_pizza_olaf_no_llm=6

Results

In [None]:
classes_conciseness_text2owl = common_classes_pizza_text2owl/len(llm_text2owl_class_labels)
classes_conciseness_olafLLM =  common_classes_pizza_olaf_llm/len(olaf_llm_class_labels)
classes_conciseness_olafNoLLM = common_classes_pizza_olaf_no_llm/len(olaf_no_llm_class_labels)
  
classes_completeness_text2owl = common_classes_pizza_text2owl/len(pizza_onto_class_labels)
classes_completeness_olafLLM = common_classes_pizza_olaf_llm/len(pizza_onto_class_labels)
classes_completeness_olafNoLLM = common_classes_pizza_olaf_no_llm/len(pizza_onto_class_labels)

individuals_conciseness_text2owl = common_ind_pizza_text2owl/len(llm_text2owl_ind_uri)
individuals_conciseness_olafLLM =  common_ind_pizza_olaf_llm/len(olaf_llm_ind_uri)
individuals_conciseness_olafNoLLM = common_ind_pizza_olaf_no_llm/len(olaf_no_llm_ind_uri)

individuals_completeness_text2owl = common_ind_pizza_text2owl/len(pizza_onto_ind_uri)
individuals_completeness_olafLLM = common_ind_pizza_olaf_llm/len(pizza_onto_ind_uri)
individuals_completeness_olafNoLLM = common_ind_pizza_olaf_no_llm/len(pizza_onto_ind_uri)

classesAndIndividuals_conciseness_text2owl = common_classes_ind_pizza_text2owl/len(llm_text2owl_classes_ind_uri)
classesAndIndividuals_conciseness_olafLLM = common_classes_ind_pizza_olaf_llm/len(olaf_llm_classes_ind_uri)
classesAndIndividuals_conciseness_olafNoLLM = common_classes_ind_pizza_olaf_no_llm/len(olaf_no_llm_classes_ind_uri)

classesAndIndividuals_completeness_text2owl = common_classes_ind_pizza_text2owl/len(pizza_onto_classes_ind_uri)
classesAndIndividuals_completeness_olafLLM = common_classes_ind_pizza_olaf_llm/len(pizza_onto_classes_ind_uri)
classesAndIndividuals_completeness_olafNoLLM = common_classes_ind_pizza_olaf_no_llm/len(pizza_onto_classes_ind_uri)

objectProperties_conciseness_text2owl = common_obj_prop_pizza_text2owl/len(pizza_onto_obj_prop_uri)
objectProperties_conciseness_olafLLM = common_obj_prop_pizza_olaf_llm/len(olaf_llm_obj_prop_uri)
objectProperties_conciseness_olafNoLLM = common_obj_prop_pizza_olaf_no_llm/len(olaf_no_llm_obj_prop_uri)

objectProperties_completeness_text2owl = common_obj_prop_pizza_text2owl/len(llm_text2owl_obj_prop_uri)
objectProperties_completeness_olafLLM = common_obj_prop_pizza_olaf_llm/len(pizza_onto_obj_prop_uri)
objectProperties_completeness_olafNoLLM = common_obj_prop_pizza_olaf_no_llm/len(pizza_onto_obj_prop_uri)

subClassOfPairs_conciseness_text2owl = common_sub_pizza_text2owl/len(llm_text2owl_sub_uri)
subClassOfPairs_conciseness_olafLLM = common_sub_pizza_olaf_llm/len(olaf_llm_sub_uri)
subClassOfPairs_conciseness_olafNoLLM = common_sub_pizza_olaf_no_llm/len(olaf_no_llm_sub_uri)

subClassOfPairs_completeness_text2owl = common_sub_pizza_text2owl/len(pizza_onto_sub_uri)
subClassOfPairs_completeness_olafLLM = common_sub_pizza_olaf_llm/len(pizza_onto_sub_uri)
subClassOfPairs_completeness_olafNoLLM = common_sub_pizza_olaf_no_llm/len(pizza_onto_sub_uri)

In [None]:
def correctness(conciseness, completeness) -> float:
    if conciseness + completeness == 0:
        return 0
    else:
        return 2*(conciseness * completeness) / (conciseness + completeness)

In [None]:
onto_evaluation = {
    "Metrics": [
        "Classes conciseness", 
        "Classes completeness", 
        "Classes correctness",
        "Individuals conciseness", 
        "Individuals completeness", 
        "Individuals correctness",
        "Classes and individuals conciseness", 
        "Classes and individuals completeness", 
        "Classes and individuals correctness",
        "Object properties conciseness", 
        "Object properties completeness", 
        "Object properties correctness",
        "SubClassOf pairs conciseness", 
        "SubClassOf pairs completeness",
        "SubClassOf pairs correctness"
    ],
    "Text to OWL": [
        classes_conciseness_text2owl, 
        classes_completeness_text2owl, 
        correctness(classes_conciseness_text2owl, classes_completeness_text2owl),
        individuals_conciseness_text2owl,
        individuals_completeness_text2owl, 
        correctness(individuals_conciseness_text2owl, individuals_completeness_text2owl),
        classesAndIndividuals_conciseness_text2owl, 
        classesAndIndividuals_completeness_text2owl, 
        correctness(classesAndIndividuals_conciseness_text2owl, classesAndIndividuals_completeness_text2owl),
        objectProperties_conciseness_text2owl, 
        objectProperties_completeness_text2owl, 
        correctness(objectProperties_conciseness_text2owl, objectProperties_completeness_text2owl),
        subClassOfPairs_conciseness_text2owl, 
        subClassOfPairs_completeness_text2owl,
        correctness(subClassOfPairs_conciseness_text2owl, subClassOfPairs_completeness_text2owl,)
    ],
    "OLAF LLM": [
        classes_conciseness_olafLLM, 
        classes_completeness_olafLLM, 
        correctness(classes_conciseness_olafLLM, classes_completeness_olafLLM),
        individuals_conciseness_olafLLM, 
        individuals_completeness_olafLLM, 
        correctness(individuals_conciseness_olafLLM, individuals_completeness_olafLLM),
        classesAndIndividuals_conciseness_olafLLM, 
        classesAndIndividuals_completeness_olafLLM, 
        correctness(classesAndIndividuals_conciseness_olafLLM, classesAndIndividuals_completeness_olafLLM),
        objectProperties_conciseness_olafLLM, 
        objectProperties_completeness_olafLLM, 
        correctness(objectProperties_conciseness_olafLLM, objectProperties_completeness_olafLLM),
        subClassOfPairs_conciseness_olafLLM,
        subClassOfPairs_completeness_olafLLM,
        correctness(subClassOfPairs_conciseness_olafLLM, subClassOfPairs_completeness_olafLLM)
    ],
    "OLAF no LLM": [
        classes_conciseness_olafNoLLM,
        classes_completeness_olafNoLLM,
        correctness(classes_conciseness_olafNoLLM, classes_completeness_olafNoLLM),
        individuals_conciseness_olafNoLLM,
        individuals_completeness_olafNoLLM,
        correctness(individuals_conciseness_olafNoLLM, individuals_completeness_olafNoLLM),
        classesAndIndividuals_conciseness_olafNoLLM,
        classesAndIndividuals_completeness_olafNoLLM,
        correctness(classesAndIndividuals_conciseness_olafNoLLM, classesAndIndividuals_completeness_olafNoLLM),
        objectProperties_conciseness_olafNoLLM,
        objectProperties_completeness_olafNoLLM,
        correctness(objectProperties_conciseness_olafNoLLM, objectProperties_completeness_olafNoLLM),
        subClassOfPairs_conciseness_olafNoLLM,
        subClassOfPairs_completeness_olafNoLLM,
        correctness(subClassOfPairs_conciseness_olafNoLLM, subClassOfPairs_completeness_olafNoLLM)
    ]
}

df_evaluation = pd.DataFrame(onto_evaluation)

In [None]:
df_evaluation