# On the pertinence of LLMs for ontology learning: results analysis

## Notebook setup

In [1]:
import os

from rdflib import Graph, BNode, URIRef
from rdflib.namespace import Namespace

from rapidfuzz.distance import Hamming

In [2]:
olaf_eswc_ns = Namespace("https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#")
pizza_onto_ns = Namespace("http://www.co-ode.org/ontologies/pizza/pizza.owl#")

pizza_onto_graph = Graph()
pizza_onto_graph.parse(os.path.join(os.getenv("DATA_PATH"), "pizza_onto_ground_truth.ttl"))

llm_text2owl_graph = Graph()
llm_text2owl_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_text_to_owl", "llm_owl_pizza_onto_eswc2024.ttl"))

olaf_llm_graph = Graph()
olaf_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_pipeline", "llm_pipeline_kr_rdf_graph_eswc2024.ttl"))

olaf_no_llm_graph = Graph()
olaf_no_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "no_llm_pipeline", "no_llm_pipeline_kr_rdf_graph_eswc2024.ttl"))

<Graph identifier=Na62e9f27cdbe4756944dfd1a297630c9 (<class 'rdflib.graph.Graph'>)>

In [3]:
owl_classes_sparql_q = """
            SELECT DISTINCT ?class WHERE {
                ?class rdf:type owl:Class .
            }
        """

owl_classes_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
            }
        """

owl_obj_props_sparql_q = """
            SELECT DISTINCT ?prop WHERE {
                ?prop rdf:type owl:ObjectProperty .
            }
        """

owl_obj_props_labels_sparql_q = """
            SELECT ?label WHERE {
                ?prop rdf:type owl:ObjectProperty .
                ?prop rdfs:label ?label .
            }
        """

owl_named_individuals_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                ?ind rdf:type owl:NamedIndividual .
            }
        """

owl_named_individuals_labels_sparql_q = """
            SELECT ?label WHERE {
                ?ind rdf:type owl:NamedIndividual .
                ?ind rdfs:label ?label .
            }
        """

domain_range_sparql_q = """
        SELECT DISTINCT ?prop ?domain ?range WHERE {
            ?prop rdfs:domain ?domain ;
                rdfs:range ?range .
        }
    """

subclasses_sparql_q = """
            SELECT DISTINCT ?subclass ?class WHERE {
                ?subclass rdfs:subClassOf ?class .
            }
        """

anonymous_some_parent_sparql_q = """
        SELECT DISTINCT ?class ?restriction_rel ?restriction_cls WHERE {
                ?class rdfs:subClassOf [
                    rdf:type owl:Restriction ;
                    owl:onProperty ?restriction_rel ;
                    owl:someValuesFrom ?restriction_cls
                ] .
        }
    """

anonymous_some_equivalent_sparql_q = """
        SELECT DISTINCT ?class ?restriction_rel ?restriction_cls WHERE {
                ?class owl:equivalentClass [
                    rdf:type owl:Restriction ;
                    owl:onProperty ?restriction_rel ;
                    owl:someValuesFrom ?restriction_cls
                ] .
        }
    """

anonymous_only_parent_sparql_q = """
        SELECT DISTINCT ?class ?restriction_rel ?restriction_cls WHERE {
                ?class rdfs:subClassOf [
                    rdf:type owl:Restriction ;
                    owl:onProperty ?restriction_rel ;
                    owl:allValuesFrom ?restriction_cls
                ] .
        }
    """

disjoint_classes_sparql_q = """
        SELECT DISTINCT ?disjoint_cls WHERE {
                [] rdf:type owl:AllDisjointClasses ;
                owl:members/rdf:rest* ?node .
                ?node rdf:first ?disjoint_cls .
        }
    """

all_diff_individuals_sparql_q = """
        SELECT DISTINCT ?diff_ind WHERE {
                [] rdf:type owl:AllDifferent ;
                owl:distinctMembers/rdf:rest* ?node .
                ?node rdf:first ?diff_ind .
        }
    """
    
def get_sparql_q_res_fragments(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    q_res = graph.query(sparql_q, initNs=ns)
    
    fragments = set()

    for res in q_res:
        t = []
        for item in res:
            if isinstance(item, BNode):
                t.append(item)
            else:
                t.append(item.fragment)

        fragments.add(tuple(t))

    # fragments = {tuple((item.fragment for item in res)) for res in q_res}

    return fragments

def get_sparql_q_label_res(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    q_res = graph.query(sparql_q, initNs=ns)

    labels = {str(res[0]) for res in q_res}

    return labels

In [7]:
owl_named_ind_from_obj_props_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?ind ?prop ?o .
                }
                UNION
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?s ?prop ?ind .
                }
                UNION
                {
                    ?ind rdf:type ?class .
                    ?class rdf:type owl:Class .
                }
                UNION
                {
                    ?ind rdf:type owl:NamedIndividual .
                }
            }
        """

rdfs_subclassof_tuples_sparql_q = """
            SELECT DISTINCT ?child ?parent WHERE {
                ?child rdfs:subClassOf ?parent .
            }
        """

owl_classes_en_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
                FILTER(LANG(?humanLabel) = "en").
            }
        """

## Ontology overviews

In [5]:
# for each ontology
# class count
# object property count
# name individual count
# subclass of relations

In [8]:
olaf_eswc_ns_bindings = {
    "olaf-eswc": olaf_eswc_ns,
    "pizza-onto": pizza_onto_ns
}

llm_text2owl_g_classes = get_sparql_q_res_fragments(sparql_q=owl_classes_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_classes = get_sparql_q_res_fragments(sparql_q=owl_classes_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_classes = get_sparql_q_res_fragments(sparql_q=owl_classes_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_classes = get_sparql_q_res_fragments(sparql_q=owl_classes_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_props = get_sparql_q_res_fragments(sparql_q=owl_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_props = get_sparql_q_res_fragments(sparql_q=owl_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_props = get_sparql_q_res_fragments(sparql_q=owl_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_props = get_sparql_q_res_fragments(sparql_q=owl_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_individuals = get_sparql_q_res_fragments(sparql_q=owl_named_individuals_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_individuals = get_sparql_q_res_fragments(sparql_q=owl_named_individuals_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_individuals = get_sparql_q_res_fragments(sparql_q=owl_named_individuals_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_individuals = get_sparql_q_res_fragments(sparql_q=owl_named_individuals_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_individuals_from_props = get_sparql_q_res_fragments(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_individuals_from_props = get_sparql_q_res_fragments(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_individuals_from_props = get_sparql_q_res_fragments(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_individuals_from_props = get_sparql_q_res_fragments(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_subclassof_t = get_sparql_q_res_fragments(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_subclassof_t = get_sparql_q_res_fragments(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_subclassof_t = get_sparql_q_res_fragments(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_subclassof_t = get_sparql_q_res_fragments(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

print("OWL named classes count:")
print(f"LLM Text to OWL: {len(llm_text2owl_g_classes)}")
print(f"OLAF LLM ontology: {len(olaf_llm_g_classes)}")
print(f"OLAF no LLM ontology: {len(olaf_no_llm_g_classes)}")
print(f"Pizza ontology: {len(pizza_onto_g_classes)}")
print()
print("OWL object properties count:")
print(f"LLM Text to OWL ontology: {len(llm_text2owl_g_props)}")
print(f"OLAF LLM ontology: {len(olaf_llm_g_props)}")
print(f"OLAF no LLM ontology: {len(olaf_no_llm_g_props)}")
print(f"Pizza ontology: {len(pizza_onto_g_props)}")
print()
print("OWL named individuals count:")
print(f"LLM Text to OWL ontology: {len(llm_text2owl_g_individuals)}")
print(f"OLAF LLM ontology: {len(olaf_llm_g_individuals)}")
print(f"OLAF no LLM ontology: {len(olaf_no_llm_g_individuals)}")
print(f"Pizza ontology: {len(pizza_onto_g_individuals)}")
print()
print("OWL named individuals count from OWL object properties:")
print(f"LLM Text to OWL ontology: {len(llm_text2owl_g_individuals_from_props)}")
print(f"OLAF LLM ontology: {len(olaf_llm_g_individuals_from_props)}")
print(f"OLAF no LLM ontology: {len(olaf_no_llm_g_individuals_from_props)}")
print(f"Pizza ontology named: {len(pizza_onto_g_individuals_from_props)}")
print()
print("RDFS subClassOf tuples count:")
print(f"LLM Text to OWL ontology: {len(llm_text2owl_g_subclassof_t)}")
print(f"OLAF LLM ontology: {len(olaf_llm_g_subclassof_t)}")
print(f"OLAF no LLM ontology: {len(olaf_no_llm_g_subclassof_t)}")
print(f"Pizza ontology: {len(pizza_onto_g_subclassof_t)}")

OWL named classes count:
LLM Text to OWL: 36
OLAF LLM ontology: 99
OLAF no LLM ontology: 111
Pizza ontology: 137

OWL object properties count:
LLM Text to OWL ontology: 2
OLAF LLM ontology: 77
OLAF no LLM ontology: 22
Pizza ontology: 8

OWL named individuals count:
LLM Text to OWL ontology: 0
OLAF LLM ontology: 0
OLAF no LLM ontology: 343
Pizza ontology: 5

OWL named individuals count from OWL object properties:
LLM Text to OWL ontology: 27
OLAF LLM ontology: 97
OLAF no LLM ontology: 343
Pizza ontology named: 5

RDFS subClassOf tuples count:
LLM Text to OWL ontology: 33
OLAF LLM ontology: 114
OLAF no LLM ontology: 518
Pizza ontology: 257


## Ontology evaluation

Comparing the generated ontologies with the pizza ontology.

In [17]:
owl_classes_en_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
                FILTER(LANG(?label) = "en").
            }
        """

In [18]:
q_res = pizza_onto_graph.query(query_object=owl_classes_en_labels_sparql_q, initNs=olaf_eswc_ns_bindings)

for res in q_res:
    print(res)

(rdflib.term.Literal('American', lang='en'),)
(rdflib.term.Literal('AmericanHot', lang='en'),)
(rdflib.term.Literal('AnchoviesTopping', lang='en'),)
(rdflib.term.Literal('ArtichokeTopping', lang='en'),)
(rdflib.term.Literal('AsparagusTopping', lang='en'),)
(rdflib.term.Literal('Cajun', lang='en'),)
(rdflib.term.Literal('CajunSpiceTopping', lang='en'),)
(rdflib.term.Literal('CaperTopping', lang='en'),)
(rdflib.term.Literal('Capricciosa', lang='en'),)
(rdflib.term.Literal('Caprina', lang='en'),)
(rdflib.term.Literal('CheeseTopping', lang='en'),)
(rdflib.term.Literal('CheesyPizza', lang='en'),)
(rdflib.term.Literal('CheesyVegetableTopping', lang='en'),)
(rdflib.term.Literal('ChickenTopping', lang='en'),)
(rdflib.term.Literal('Country', lang='en'),)
(rdflib.term.Literal('DeepPanBase', lang='en'),)
(rdflib.term.Literal('DomainThing', lang='en'),)
(rdflib.term.Literal('Fiorentina', lang='en'),)
(rdflib.term.Literal('SeafoodTopping', lang='en'),)
(rdflib.term.Literal('Food', lang='en'),)
(rdf

In [19]:
pizza_onto_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_en_labels_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_class_labels = {label.lower().replace(" ", "") for label in pizza_onto_class_labels}

In [20]:
len(pizza_onto_class_labels)

95