# On the pertinence of LLMs for ontology learning: results analysis

## Notebook setup

In [1]:
import os

from rapidfuzz import process, fuzz
from rdflib import Graph, BNode, URIRef
from rdflib.namespace import Namespace

import pandas as pd

In [2]:
olaf_eswc_ns = Namespace("https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#")
pizza_onto_ns = Namespace("http://www.co-ode.org/ontologies/pizza/pizza.owl#")

olaf_eswc_ns_bindings = {
    "olaf-eswc": olaf_eswc_ns,
    "pizza-onto": pizza_onto_ns
}

In [3]:
pizza_onto_graph = Graph()
pizza_onto_graph.parse(os.path.join(os.getenv("DATA_PATH"), "pizza_onto_ground_truth.ttl"))

llm_text2owl_graph = Graph()
llm_text2owl_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_text_to_owl", "llm_owl_pizza_onto_eswc2024.ttl"))

olaf_llm_graph = Graph()
olaf_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "llm_pipeline", "llm_pipeline_kr_rdf_graph_eswc2024.ttl"))

olaf_no_llm_graph = Graph()
olaf_no_llm_graph.parse(os.path.join(os.getenv("RESULTS_PATH"), "no_llm_pipeline", "no_llm_pipeline_kr_rdf_graph_eswc2024.ttl"))

<Graph identifier=Nfd35118404ec40a0816964e3a844a172 (<class 'rdflib.graph.Graph'>)>

## SPARQL tools

In [4]:
owl_classes_sparql_q = """
            SELECT DISTINCT ?class WHERE {
                ?class rdf:type owl:Class .
            }
        """

owl_classes_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
            }
        """

owl_classes_en_labels_sparql_q = """
            SELECT ?label WHERE {
                ?class rdf:type owl:Class ;
                        rdfs:label ?label .
                FILTER(LANG(?label) = "en").
            }
        """

owl_obj_props_sparql_q = """
            SELECT DISTINCT ?prop WHERE {
                ?prop rdf:type owl:ObjectProperty .
            }
        """

owl_named_individuals_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                ?ind rdf:type owl:NamedIndividual .
            }
        """

owl_named_ind_from_obj_props_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?ind ?prop ?o .
                }
                UNION
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?s ?prop ?ind .
                }
                UNION
                {
                    ?ind rdf:type ?class .
                    ?class rdf:type owl:Class .
                }
                UNION
                {
                    ?ind rdf:type owl:NamedIndividual .
                }
            }
        """

owl_named_classes_ind_from_obj_props_sparql_q = """
            SELECT DISTINCT ?ind WHERE {
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?ind ?prop ?o .
                }
                UNION
                {
                    ?prop rdf:type owl:ObjectProperty .
                    ?s ?prop ?ind .
                }
                UNION
                {
                    ?ind rdf:type ?class .
                    ?class rdf:type owl:Class .
                }
                UNION
                {
                    ?ind rdf:type owl:NamedIndividual .
                }
                UNION
                {
                    ?ind rdf:type owl:Class .
                }
            }
        """

rdfs_subclassof_tuples_sparql_q = """
            SELECT DISTINCT ?child ?parent WHERE {
                ?child rdfs:subClassOf ?parent .
            }
        """

In [5]:
def get_sparql_q_res_fragments_skip_bnodes(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    q_res = graph.query(sparql_q, initNs=ns)
    
    fragments = set()
    for res in q_res:
        t = []
        for item in res:
            if not isinstance(item, BNode):
                t.append(item.fragment)
        fragments.add(tuple(t))

    return fragments

In [6]:
def get_sparql_q_label_res(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    q_res = graph.query(sparql_q, initNs=ns)

    labels = {str(res[0]) for res in q_res}

    return labels

In [7]:
def get_sparql_q_tuple_res(sparql_q: str, graph: Graph, ns: dict[str, Namespace]) -> set[tuple]:
    q_res = graph.query(sparql_q, initNs=ns)

    labels = {(str(res[0]), str(res[1])) for res in q_res}

    return labels

## Ontologies overview

In [8]:
llm_text2owl_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_classes = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_classes_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_individuals = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_individuals_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_individuals_from_props = get_sparql_q_res_fragments_skip_bnodes(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

llm_text2owl_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_g_subclassof_t = get_sparql_q_res_fragments_skip_bnodes(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)

In [9]:
onto_overview = {
    "Count": ["OWL named classes", "OWL object properties", "OWL named individuals", "OWL named individuals from OWL object properties", "RDFS subClassOf tuples"],
    "Pizza": [len(pizza_onto_g_classes), len(pizza_onto_g_props), len(pizza_onto_g_individuals), len(pizza_onto_g_individuals_from_props), len(pizza_onto_g_subclassof_t)],
    "Text to OWL": [len(llm_text2owl_g_classes), len(llm_text2owl_g_props), len(llm_text2owl_g_individuals), len(llm_text2owl_g_individuals_from_props), len(llm_text2owl_g_subclassof_t)],
    "OLAF LLM": [len(olaf_llm_g_classes), len(olaf_llm_g_props), len(olaf_llm_g_individuals), len(olaf_llm_g_individuals_from_props), len(olaf_llm_g_subclassof_t)],
    "OLAF no LLM": [len(olaf_no_llm_g_classes), len(olaf_no_llm_g_props), len(olaf_no_llm_g_individuals), len(olaf_no_llm_g_individuals_from_props), len(olaf_no_llm_g_subclassof_t)]
}

df_overview = pd.DataFrame(onto_overview)

In [10]:
df_overview

Unnamed: 0,Count,Pizza,Text to OWL,OLAF LLM,OLAF no LLM
0,OWL named classes,97,36,99,111
1,OWL object properties,8,2,77,22
2,OWL named individuals,5,0,0,343
3,OWL named individuals from OWL object properties,5,27,97,343
4,RDFS subClassOf tuples,141,33,114,390


## Ontologies evaluation

Classes

In [11]:
pizza_onto_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_en_labels_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_class_labels = {label.lower().replace(" ", "") for label in pizza_onto_class_labels}

In [12]:
llm_text2owl_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_labels_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
llm_text2owl_class_labels = list({label.lower().replace(" ", "") for label in llm_text2owl_class_labels})


for true_label in pizza_onto_class_labels:
    res = process.extract(true_label, llm_text2owl_class_labels, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

american :  artichoketopping (55.38461538461539), capricciosatopping (51.42857142857142), base (51.42857142857142)
fourseasons :  fourcheesestopping (62.06896551724138), base (45.0), mozzarellatopping (42.85714285714286)
cheesetopping :  cheesetopping (100.0), topping (90.0), fourcheesestopping (83.87096774193549)
cheesyvegetabletopping :  topping (90.0), vegetabletopping (84.21052631578947), leektopping (81.0)
siciliana :  spinachtopping (51.42857142857142), spicytopping (47.61904761904761), combinationtopping (45.0)
garlictopping :  topping (90.0), artichoketopping (75.86206896551724), olivetopping (72.0)
meatypizza :  pizza (90.0), meattopping (57.14285714285714), mozzarellatopping (48.0)
anchoviestopping :  topping (90.0), cheesetopping (75.86206896551724), artichoketopping (75.0)
meattopping :  meattopping (100.0), topping (90.0), capricciosatopping (75.78947368421052)
sloppygiuseppe :  sloppygiuseppetopping (90.0), topping (51.42857142857142), jalapenopeppertopping (46.6666666666

In [13]:
common_classes_pizza_text2owl=36

In [14]:
olaf_llm_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_class_labels = list({label.lower().replace("https://github.com/wikit-ai/olaf-llm-eswc2024/o/example#", "") for label in olaf_llm_class_labels})


for true_label in pizza_onto_class_labels:
    res = process.extract(true_label, olaf_llm_class_labels, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

american :  americanhotpizza (90.0), ham (72.0), parmesan (62.5)
fourseasons :  fourcheesestopping (62.06896551724138), options (54.0), bases (54.0)
cheesetopping :  cheesetopping (100.0), cheese (90.0), fourcheesestopping (83.87096774193549)
cheesyvegetabletopping :  cheesyvegetabletopping (100.0), cheesy (90.0), vegetabletopping (84.21052631578947)
siciliana :  spiciness (55.55555555555556), basil (54.0), spinachtopping (51.42857142857142)
garlictopping :  artichoketopping (75.86206896551724), olivetopping (72.0), vegetabletopping (68.96551724137932)
meatypizza :  meatypizza (100.0), pizza (90.0), meaty (90.0)
anchoviestopping :  cheesetopping (75.86206896551724), artichoketopping (75.0), chickentopping (73.33333333333334)
meattopping :  meattopping (100.0), meaty (80.0), cheesyvegetabletopping (75.78947368421052)
sloppygiuseppe :  topped (60.00000000000001), basil (51.42857142857142), types (51.42857142857142)
spinachtopping :  spinachtopping (100.0), pizzatoppings (74.0740740740740

In [15]:
common_classes_pizza_olaf_llm=55

In [16]:
olaf_no_llm_class_labels = get_sparql_q_label_res(sparql_q=owl_classes_labels_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_class_labels = list({label.lower().replace(" ", "") for label in olaf_no_llm_class_labels})


for true_label in pizza_onto_class_labels:
    res = process.extract(true_label, olaf_no_llm_class_labels, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

american :  american (100.0), americanhot (84.21052631578947), pan (72.0)
fourseasons :  fourth (72.0), red (60.00000000000001), non (60.00000000000001)
cheesetopping :  cheesy (81.81818181818181), leektopping (75.0), hamtopping (69.56521739130434)
cheesyvegetabletopping :  cheesy (90.0), cheesyvegetable (81.08108108108108), leektopping (81.0)
siciliana :  italian (62.5), ad (60.00000000000001), pan (60.00000000000001)
garlictopping :  artichoketopping (75.86206896551724), hamtopping (69.56521739130434), leektopping (66.66666666666667)
meatypizza :  meatypizza (100.0), pizza (90.0), meat (90.0)
anchoviestopping :  anchovies (90.0), artichoketopping (75.0), hamtopping (74.11764705882352)
meattopping :  meat (90.0), hamtopping (76.19047619047619), leektopping (72.72727272727273)
sloppygiuseppe :  sloppygiuseppe (100.0), giuseppe (90.0), type (60.00000000000001)
spinachtopping :  spinach (90.0), artichoketopping (66.66666666666667), asparagustopping (66.66666666666667)
hotspicedbeeftoppin

In [17]:
common_classes_pizza_olaf_no_llm=43

Individuals

In [18]:
pizza_onto_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_ind_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_ind_uri})

In [19]:
llm_text2owl_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
llm_text2owl_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in llm_text2owl_ind_uri})

for true_label in pizza_onto_ind_uri:
    res = process.extract(true_label, llm_text2owl_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

england :  thinandcrispybase1 (51.42857142857142), napoletanatopping1 (40.0), polloadastratopping1 (38.57142857142858)
italy :  margheritatopping1 (54.0), jalapenopeppertopping1 (45.0), pizza1 (36.36363636363637)
france :  prawnstopping1 (49.090909090909086), fourcheesestopping1 (49.090909090909086), hotgreenpeppertopping1 (45.0)
america :  artichoketopping1 (60.00000000000001), capricciosatopping1 (55.38461538461539), margheritatopping1 (51.42857142857142)
germany :  margheritatopping1 (51.42857142857142), greenpeppertopping1 (45.0), prawnstopping1 (45.0)


In [20]:
common_ind_pizza_text2owl=0

In [21]:
olaf_llm_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_llm_ind_uri})
for true_label in pizza_onto_ind_uri:
    res = process.extract(true_label, olaf_llm_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 

england :  leavenedwheatbaseddough (51.42857142857142), thinandcrispybase (51.42857142857142), vegetarian (47.05882352941176)
italy :  italianorigin (80.0), widevariety (60.00000000000001), cheesyvegetabletopping (54.0)
france :  vegetarian (54.0), fourcheesestopping (49.090909090909086), prawnstopping (49.090909090909086)
america :  americanhotpizza (90.0), ham (72.0), artichoketopping (60.00000000000001)
germany :  vegetarian (58.82352941176471), parmesan (53.333333333333336), americanhotpizza (51.42857142857142)


In [23]:
common_ind_pizza_olaf_llm = 1

In [24]:
olaf_no_llm_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_no_llm_ind_uri})
for true_label in pizza_onto_ind_uri:
    res = process.extract(true_label, olaf_no_llm_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})")

england :  _ad (60.00000000000001), _cookingpan (60.00000000000001), _artichokeplant (55.38461538461539)
italy :  _italian (72.0), _italianorigin (72.0), _significantly (67.5)
france :  _spinaciaoleracea (65.45454545454547), _broadness (60.00000000000001), _anchovies (54.0)
america :  _america (93.33333333333333), _americanhot (90.0), _americanhotpizza (90.0)
germany :  _vegetarian (55.55555555555556), _giardiniera (54.0), _nonvegetarian (51.42857142857142)


In [25]:
common_ind_pizza_olaf_no_llm = 2

Classes + indiviuals

In [26]:
pizza_onto_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_classes_ind_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_classes_ind_uri})
pizza_onto_classes_ind_uri = [uri for uri in pizza_onto_classes_ind_uri if not(uri[0:2]=="nc")]

In [27]:
llm_text2owl_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
llm_text2owl_classes_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in llm_text2owl_classes_ind_uri})

for true_label in pizza_onto_classes_ind_uri:
    res = process.extract(true_label, llm_text2owl_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

n7be8cb1f14e84db2af1f493bc7d80cffb347 :  deeppanbase1 (36.0), deeppanbase (36.0), pizza1 (30.000000000000004)
american :  artichoketopping1 (55.38461538461539), artichoketopping (55.38461538461539), capricciosatopping (51.42857142857142)
fourseasons :  fourcheesestopping (62.06896551724138), fourcheesestopping1 (60.0), base (45.0)
england :  thinandcrispybase1 (51.42857142857142), thinandcrispybase (51.42857142857142), vegetabletopping (41.53846153846154)
cheesetopping :  cheesetopping (100.0), topping (90.0), fourcheesestopping (83.87096774193549)
siciliana :  spinachtopping (51.42857142857142), spinachtopping1 (51.42857142857142), spicytopping (47.61904761904761)
garlictopping :  topping (90.0), artichoketopping (75.86206896551724), artichoketopping1 (73.33333333333334)
n7be8cb1f14e84db2af1f493bc7d80cffb323 :  deeppanbase1 (36.0), deeppanbase (36.0), pizza1 (30.000000000000004)
meatypizza :  pizza (90.0), pizza1 (81.81818181818181), meattopping (57.14285714285714)
n7be8cb1f14e84db2af

In [28]:
common_classes_ind_pizza_text2owl=34

In [29]:
olaf_llm_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_classes_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_llm_classes_ind_uri})

for true_label in pizza_onto_classes_ind_uri:
    res = process.extract(true_label, olaf_llm_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

n7be8cb1f14e84db2af1f493bc7d80cffb347 :  bases (40.0), baked (40.0), deeppanbase (36.0)
american :  americanhotpizza (90.0), ham (72.0), parmesan (62.5)
fourseasons :  fourcheesestopping (62.06896551724138), options (54.0), bases (54.0)
england :  leavenedwheatbaseddough (51.42857142857142), thinandcrispybase (51.42857142857142), vegetarian (47.05882352941176)
cheesetopping :  cheesetopping (100.0), cheese (90.0), fourcheesestopping (83.87096774193549)
siciliana :  spiciness (55.55555555555556), basil (54.0), spinachtopping (51.42857142857142)
garlictopping :  artichoketopping (75.86206896551724), olivetopping (72.0), vegetabletopping (68.96551724137932)
n7be8cb1f14e84db2af1f493bc7d80cffb323 :  bases (40.0), baked (40.0), deeppanbase (36.0)
meatypizza :  meatypizza (100.0), pizza (90.0), meaty (90.0)
n7be8cb1f14e84db2af1f493bc7d80cffb32 :  bases (40.0), baked (40.0), deeppanbase (36.0)
anchoviestopping :  cheesetopping (75.86206896551724), artichoketopping (75.0), chickentopping (73.33

In [30]:
common_classes_ind_pizza_olaf_llm=54

In [31]:
olaf_no_llm_classes_ind_uri = get_sparql_q_label_res(sparql_q=owl_named_classes_ind_from_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_classes_ind_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_no_llm_classes_ind_uri})

for true_label in pizza_onto_classes_ind_uri:
    res = process.extract(true_label, olaf_no_llm_classes_ind_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

n7be8cb1f14e84db2af1f493bc7d80cffb347 :  _panbase (45.0), _base (40.0), _beef (40.0)
american :  american (100.0), _american (94.11764705882352), _americanhot (90.0)
fourseasons :  fourth (72.0), _fourth (65.45454545454547), red (60.00000000000001)
england :  oven (60.00000000000001), _ad (60.00000000000001), _cookingpan (60.00000000000001)
cheesetopping :  _cheesetopping (96.2962962962963), _cheesestopping (92.85714285714286), _topping (84.0)
siciliana :  italian (62.5), pan (60.00000000000001), ad (60.00000000000001)
garlictopping :  _topping (84.0), artichoketopping (75.86206896551724), _artichoketopping (73.33333333333334)
n7be8cb1f14e84db2af1f493bc7d80cffb323 :  _panbase (45.0), _base (40.0), _beef (40.0)
meatypizza :  meatypizza (100.0), _meatypizza (95.23809523809523), pizza (90.0)
n7be8cb1f14e84db2af1f493bc7d80cffb32 :  _panbase (45.0), _base (40.0), _beef (40.0)
anchoviestopping :  anchovies (90.0), _anchovies (85.26315789473685), _topping (84.0)
meattopping :  meat (90.0), _m

In [32]:
common_classes_ind_pizza_olaf_no_llm=59

Object properties

In [33]:
pizza_onto_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_obj_prop_uri = list({label.lower().replace(str(pizza_onto_ns), "") for label in pizza_onto_obj_prop_uri})

In [34]:
llm_text2owl_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
llm_text2owl_obj_prop_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in llm_text2owl_obj_prop_uri})

for true_label in pizza_onto_obj_prop_uri:
    res = process.extract(true_label, llm_text2owl_obj_prop_uri, scorer=fuzz.WRatio, limit=2)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]})") 
    

hasbase :  hasbase (100.0), hastopping (35.29411764705882)
isbaseof :  hasbase (66.66666666666667), hastopping (33.333333333333336)
hastopping :  hastopping (100.0), hasbase (35.29411764705882)
hascountryoforigin :  hasbase (54.0), hastopping (52.94117647058824)
hasingredient :  hasbase (54.0), hastopping (52.17391304347826)
istoppingof :  hastopping (76.19047619047619), hasbase (20.0)
isingredientof :  hastopping (33.333333333333336), hasbase (25.71428571428571)
hasspiciness :  hasbase (65.45454545454547), hastopping (54.54545454545454)


In [35]:
common_obj_prop_pizza_text2owl=2

In [36]:
olaf_llm_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_obj_prop_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_llm_obj_prop_uri})

for true_label in pizza_onto_obj_prop_uri:
    res = process.extract(true_label, olaf_llm_obj_prop_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

hasbase :  hasbase (100.0), hasbasil (80.0), haspizzabase (75.00000000000001)
isbaseof :  is (90.0), hasbase (66.66666666666667), haspizzabase (60.00000000000001)
hastopping :  hastopping (100.0), hasmeattopping (83.33333333333334), haspizzatopping (80.0)
hascountryoforigin :  hasorigin (72.0), hasdough (64.28571428571429), hasham (60.00000000000001)
hasingredient :  hasingredient (100.0), haspizzaingredient (83.87096774193549), hasonion (69.23076923076923)
istoppingof :  is (90.0), hastopping (76.19047619047619), hasseafoodtopping (70.00000000000001)
isingredientof :  is (90.0), hasingredient (81.4814814814815), haspizzaingredient (68.75)
hasspiciness :  hasspiciness (100.0), haspizzaspiciness (82.75862068965517), hasbase (65.45454545454547)


In [37]:
common_obj_prop_pizza_olaf_llm = 5

In [38]:
olaf_no_llm_obj_prop_uri = get_sparql_q_label_res(sparql_q=owl_obj_props_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_obj_prop_uri = list({label.lower().replace(str(olaf_eswc_ns), "") for label in olaf_no_llm_obj_prop_uri})

for true_label in pizza_onto_obj_prop_uri:
    res = process.extract(true_label, olaf_no_llm_obj_prop_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

hasbase :  based (66.66666666666667), are (60.00000000000001), used (60.00000000000001)
isbaseof :  based (72.0), are (60.00000000000001), baked (54.0)
hastopping :  topping (82.35294117647058), topped (60.00000000000001), consisting (50.0)
hascountryoforigin :  ranging (60.00000000000001), based (45.0), prefer (45.0)
hasingredient :  are (60.00000000000001), ranging (55.38461538461539), fired (54.0)
istoppingof :  topping (90.0), topped (60.00000000000001), consisting (57.14285714285714)
isingredientof :  are (60.00000000000001), fired (54.0), topping (45.0)
hasspiciness :  includes (55.38461538461539), suggests (49.090909090909086), based (45.0)


In [39]:
common_obj_prop_pizza_olaf_no_llm = 3

SubClassOf tuples

In [40]:
pizza_onto_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=pizza_onto_graph, ns=olaf_eswc_ns_bindings)
pizza_onto_sub_uri = list({f"{label[0].lower().replace(str(pizza_onto_ns), '')}#{label[1].lower().replace(str(pizza_onto_ns), '')}" for label in pizza_onto_sub_uri if not(label[0][0:2]=="nc" or label[1][0:2]=="nc")})

In [41]:
llm_text2owl_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=llm_text2owl_graph, ns=olaf_eswc_ns_bindings)
llm_text2owl_sub_uri = list({f"{label[0].lower().replace(str(olaf_eswc_ns), '')}#{label[1].lower().replace(str(olaf_eswc_ns), '')}" for label in llm_text2owl_sub_uri})

for true_label in pizza_onto_sub_uri:
    res = process.extract(true_label, llm_text2owl_sub_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

realitalianpizza#n7be8cb1f14e84db2af1f493bc7d80cffb259 :  meattopping#topping (39.375), vegetabletopping#topping (38.91891891891892), thinandcrispybase#base (37.67441860465116)
capricciosa#n7be8cb1f14e84db2af1f493bc7d80cffb44 :  capricciosatopping#uniquetopping (52.5), thinandcrispybase#base (37.89473684210526), deeppanbase#base (33.75)
veneziana#n7be8cb1f14e84db2af1f493bc7d80cffb365 :  deeppanbase#base (48.0), vegetabletopping#topping (36.0), thinandcrispybase#base (35.0)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb300 :  seafoodtopping#topping (32.14285714285714), deeppanbase#base (28.799999999999997), thinandcrispybase#base (28.63636363636364)
mushroom#n7be8cb1f14e84db2af1f493bc7d80cffb177 :  mushroomtopping#vegetabletopping (33.333333333333336), deeppanbase#base (28.125), seafoodtopping#topping (28.125)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb305 :  seafoodtopping#topping (32.14285714285714), deeppanbase#base (28.799999999999997), thinandcrispybase#base (28.63636363636364)
parmense#n7be8

In [42]:
common_sub_pizza_text2owl=17

In [43]:
olaf_llm_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_llm_sub_uri = list({f"{label[0].lower().replace(str(olaf_eswc_ns), '')}#{label[1].lower().replace(str(olaf_eswc_ns), '')}" for label in olaf_llm_sub_uri})

for true_label in pizza_onto_sub_uri:
    res = process.extract(true_label, olaf_llm_sub_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

realitalianpizza#n7be8cb1f14e84db2af1f493bc7d80cffb259 :  margheritapizza#pizza (58.378378378378386), vegetarianpizza#pizza (56.84210526315789), meatypizza#pizza (56.25)
capricciosa#n7be8cb1f14e84db2af1f493bc7d80cffb44 :  capricciosapizza#pizza (63.52941176470588), ricotta#fontina (45.0), capers#pizzatoppings (42.0)
veneziana#n7be8cb1f14e84db2af1f493bc7d80cffb365 :  vegetarianpizza#pizza (46.45161290322581), nonvegetarianpizza#pizza (42.35294117647059), vegetarian#pizzatoppings (41.142857142857146)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb300 :  dish#pizza (36.0), doughy#pizzabases (34.61538461538461), seafoodpizza#pizza (31.304347826086957)
mushroom#n7be8cb1f14e84db2af1f493bc7d80cffb177 :  mushrooms#pizzatoppings (54.54545454545455), mushroomtopping#vegetabletopping (33.333333333333336), round#pizzabases (31.034482758620694)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb305 :  dish#pizza (36.0), doughy#pizzabases (34.61538461538461), seafoodpizza#pizza (31.304347826086957)
parmense#n7be8cb1f14

In [44]:
common_sub_pizza_olaf_llm=27

In [45]:
olaf_no_llm_sub_uri = get_sparql_q_tuple_res(sparql_q=rdfs_subclassof_tuples_sparql_q, graph=olaf_no_llm_graph, ns=olaf_eswc_ns_bindings)
olaf_no_llm_sub_uri = list({f"{label[0].lower().replace(str(olaf_eswc_ns), '')}#{label[1].lower().replace(str(olaf_eswc_ns), '')}" for label in olaf_no_llm_sub_uri})

for true_label in pizza_onto_sub_uri:
    res = process.extract(true_label, olaf_no_llm_sub_uri, scorer=fuzz.WRatio, limit=3)
    print(f"{true_label} :  {res[0][0]} ({res[0][1]}), {res[1][0]} ({res[1][1]}), {res[2][0]} ({res[2][1]})") 
    

realitalianpizza#n7be8cb1f14e84db2af1f493bc7d80cffb259 :  main#pizza (72.0), di#pizza (67.5), ad#pizza (67.5)
capricciosa#n7be8cb1f14e84db2af1f493bc7d80cffb44 :  example#capricciosapizza (56.57142857142857), capers#pizza (52.94117647058824), capers#olives (50.0)
veneziana#n7be8cb1f14e84db2af1f493bc7d80cffb365 :  oven#base (55.38461538461539), oven#italian (54.78260869565218), oven#pizza (52.94117647058824)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb300 :  wood#base (50.0), dish#base (50.0), doughy#base (45.0)
mushroom#n7be8cb1f14e84db2af1f493bc7d80cffb177 :  wood#base (50.0), example#mushroomtopping (49.090909090909086), oven#base (40.0)
soho#n7be8cb1f14e84db2af1f493bc7d80cffb305 :  wood#base (50.0), dish#base (50.0), doughy#base (45.0)
parmense#n7be8cb1f14e84db2af1f493bc7d80cffb206 :  non#parmesan (60.00000000000001), fourth#parmesan (51.42857142857142), ad#green (51.42857142857142)
gorgonzolatopping#cheesetopping :  basil#gorgonzola (69.23076923076923), simple#gorgonzola (66.666666666666

In [46]:
common_sub_pizza_olaf_no_llm=6

Results

In [47]:
onto_evaluation = {
    "Metrics": ["Classes precision", "Classes recall", "Individuals precision", "Individuals recall", "Classes and individuals precision", "Classes and individuals recall", "Object properties precision", "Object properties recall", "SubClass of pairs precision", "SubClass of pairs recall"],
    "Text to OWL": [common_classes_pizza_text2owl/len(llm_text2owl_class_labels), common_classes_pizza_text2owl/len(pizza_onto_class_labels), common_ind_pizza_text2owl/len(llm_text2owl_ind_uri), common_ind_pizza_text2owl/len(pizza_onto_ind_uri), common_classes_ind_pizza_text2owl/len(llm_text2owl_classes_ind_uri), common_classes_ind_pizza_text2owl/len(pizza_onto_classes_ind_uri), common_obj_prop_pizza_text2owl/len(pizza_onto_obj_prop_uri), common_obj_prop_pizza_text2owl/len(llm_text2owl_obj_prop_uri), common_sub_pizza_text2owl/len(llm_text2owl_sub_uri), common_sub_pizza_text2owl/len(pizza_onto_sub_uri)],
    "OLAF LLM": [common_classes_pizza_olaf_llm/len(olaf_llm_class_labels), common_classes_pizza_olaf_llm/len(pizza_onto_class_labels), common_ind_pizza_olaf_llm/len(olaf_llm_ind_uri), common_ind_pizza_olaf_llm/len(pizza_onto_ind_uri), common_classes_ind_pizza_olaf_llm/len(olaf_llm_classes_ind_uri), common_classes_ind_pizza_olaf_llm/len(pizza_onto_classes_ind_uri), common_obj_prop_pizza_olaf_llm/len(olaf_llm_obj_prop_uri), common_obj_prop_pizza_olaf_llm/len(pizza_onto_obj_prop_uri), common_sub_pizza_olaf_llm/len(olaf_llm_sub_uri), common_sub_pizza_olaf_llm/len(pizza_onto_sub_uri)],
    "OLAF no LLM": [common_classes_pizza_olaf_no_llm/len(olaf_no_llm_class_labels), common_classes_pizza_olaf_no_llm/len(pizza_onto_class_labels), common_ind_pizza_olaf_no_llm/len(olaf_no_llm_ind_uri), common_ind_pizza_olaf_no_llm/len(pizza_onto_ind_uri), common_classes_ind_pizza_olaf_no_llm/len(olaf_no_llm_classes_ind_uri), common_classes_ind_pizza_olaf_no_llm/len(pizza_onto_classes_ind_uri), common_obj_prop_pizza_olaf_no_llm/len(olaf_no_llm_obj_prop_uri), common_obj_prop_pizza_olaf_no_llm/len(pizza_onto_obj_prop_uri), common_sub_pizza_olaf_no_llm/len(olaf_no_llm_sub_uri), common_sub_pizza_olaf_no_llm/len(pizza_onto_sub_uri)]
}

df_evaluation = pd.DataFrame(onto_evaluation)

In [48]:
df_evaluation

Unnamed: 0,Metrics,Text to OWL,OLAF LLM,OLAF no LLM
0,Classes precision,1.0,0.56701,0.387387
1,Classes recall,0.378947,0.578947,0.452632
2,Individuals precision,0.0,0.010526,0.005848
3,Individuals recall,0.0,0.2,0.4
4,Classes and individuals precision,0.539683,0.556701,0.130243
5,Classes and individuals recall,0.239437,0.380282,0.415493
6,Object properties precision,0.25,0.064935,0.136364
7,Object properties recall,1.0,0.625,0.375
8,SubClass of pairs precision,0.515152,0.236842,0.011583
9,SubClass of pairs recall,0.066148,0.105058,0.023346
