In [61]:
import os
from io import StringIO
import dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from langchain_openai import AzureChatOpenAI

dotenv.load_dotenv()

True

In [62]:
model: AzureChatOpenAI = None
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

system_prompt = """
You are an expert in ontology engineering. Generate an OWL ontology based on the following domain description:
Define classes, data properties, and object properties.
Include domain and range for each property.
Provide the output in OWL (XML) format.
IUPAC rules for source-based names of polymers specify that, when “poly” is followed by more than one word, parentheses are used. The IUPAC practice is followed in this International Standard.
When a term has one or more synonyms, they follow the preferred term. The synonyms are listed in alphabetical order and need to be carried over to the OWL ontology.
Make sure to also extract synonym and extract a boolean property if the indicated terms are deprecated.
You should approach it first by looking at all the content and group the whole content into at most 100 topics. Then you should create a hierarchy of classes and properties according to common characteristics and properties for example for elements that are about composits, cold or cooling you should create a common class and then create subclasses for each of the terms.
Some definitions in this International Standard begin with information in angled brackets. This has been added to indicate limitation of the definition to a particular field and needs to be highlighted in the OWL ontology.
Structure the ontology in a way that is easy to understand and navigate using the existing terms and the new terms to specialize the ontology.
Add the numeric value to the label of the class or property. Do not use any special characters in the label.
You should group the terms into classes and properties according to their common characteristics and properties for example everything with FireTesting, Recyling, etc should be grouped into a class and then create subclasses for each of the terms.
Add a hierarchy of classes and properties according to common characteristics and properties for example for elements that are about composits, cold or cooling you should create a common class and then create subclasses for each of the terms.
Make sure to cary over the definitions, numerations and value ranges and add them to the ontology. You should avoid creating single top level elements in the hierary with no children. Make sure to create common classes and properties for the terms that are related to each other.
Provide the output in OWL (XML) format and only output the ontology and nothing else while minimizing characters - do not add line breaks or comments.
This is the content you should use as input:"""

onthology_prompt = """Here is the existing onthology. Please extend and restructure it according to the new input so that there more hierarchy levels and the new terms are added to the correct classes and properties. Please make sure to also add the synonyms, deprecated boolean property and the definitions to the ontology. Provide the output in OWL (XML) format and only output the ontology and nothing else while minimizing characters - do not add line breaks or comments."""

In [63]:
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

model = AzureChatOpenAI(
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_deployment=os.getenv("AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME"),
        openai_api_version=os.getenv("AZURE_OPENAI_VERSION"),
        temperature=0
    )

reasoning_model = AzureChatOpenAI(
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_deployment='o1-mini',
        openai_api_version=os.getenv("AZURE_OPENAI_VERSION"),
        temperature=1
    )

In [64]:
image_path = "../../data/iso_472_large.md"
onthology_file_path="onthology_iso_427_large.xml"

with open(image_path, "r") as file:
    markdown = file.read()

with open(onthology_file_path, "r") as file:
    onthology = file.read()

message = HumanMessage(
    content=[
        {"type": "text", "text": system_prompt + markdown + onthology_prompt + onthology},
    ],
)



response = model.invoke([message])
print(response)

with open(onthology_file_path, "w") as file:
    new_ontology = response.content.replace("```xml", "").replace("```", "")
    file.write(new_ontology)

content='```xml\n<rdf:RDF xmlns="http://www.w3.org/2002/07/owl#" xml:base="http://www.example.org/ontology" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#">\n<owl:Ontology rdf:about="http://www.example.org/ontology"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Polymer"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Plastic"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Adhesive"/>\n<owl:Class rdf:about="http://www.example.org/ontology#FireTesting"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Recycling"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Composite"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Thermoforming"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Moulding"/>\n<owl:Class rdf:about="http://www.example.org/ontology#Testing"/>\n<owl:Class rdf:about="h