In [17]:
import os
from pathlib import Path
from io import StringIO
import dotenv
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from langchain_openai import AzureChatOpenAI

dotenv.load_dotenv()

True

In [18]:
model: AzureChatOpenAI = None
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

system_prompt = """
Look at the images below and use them as input. Generate a response based on the images.
You are an expert in ontology engineering. Generate an OWL ontology based on the following domain description:
Define classes, data properties, and object properties with their term designation, the norm name, a description of what it is, and the range and domain while also group them by domain.
Include domain and range for each property.
IUPAC rules for source-based names of polymers specify that, when “poly” is followed by more than one word, parentheses are used. The IUPAC practice is followed in this International Standard.
When a term has one or more synonyms, they follow the preferred term. The synonyms are listed in alphabetical order and need to be carried over to the OWL ontology.
Make sure to also extract synonym and extract a boolean property if the indicated terms are deprecated.
Some definitions in this International Standard begin with information in angled brackets. This has been added to indicate limitation of the definition to a particular field and needs to be highlighted in the OWL ontology.
The numerical values in the definitions should be use to create a hierarchy of classes and properties.
Add the numeric value to the label of the class or property.
Provide the output in OWL (XML) format and only output the ontology and nothing else"""

task_prompt = """Generate an ontology based on the following domain description of screw specification in ISO and DIN norm. This is the existing ontology that you need to extend if necessary. Do not remove any existing classes or properties but you can move them in the hierarchy if necessary.:

"""

In [19]:
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

model = AzureChatOpenAI(
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        azure_deployment=os.getenv("AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME"),
        openai_api_version=os.getenv("AZURE_OPENAI_VERSION"),
        temperature=0
    )

In [20]:
import base64
from io import BytesIO
from PIL import Image

def convert_to_base64(pil_image):
    """
    Convert PIL images to Base64 encoded strings

    :param pil_image: PIL image
    :return: Base64 string
    """
    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str

def load_image(image_path):
    """
    Load an image from a file path

    :param image_path: Path to the image
    :return: PIL image
    """
    return Image.open(image_path)

In [21]:
image_path = "../../data/DIN EN ISO 472_2013"

onthology_file_path="onthology_iso_472.xml"

image_data_0 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-0.png"))
image_data_1 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-1.png"))
image_data_2 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-2.png"))
image_data_3 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-3.png"))
image_data_4 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-4.png"))
image_data_5 = convert_to_base64(load_image(image_path + "/DIN EN ISO 472_2013-5.png"))


def process_file(image_file_path, onthology_file_path):
    image_data = convert_to_base64(load_image(image_file_path))
    with open(onthology_file_path, "r") as file:
        ontology_content = file.read()

    message = HumanMessage(
        content=[
            {"type": "text", "text": system_prompt + task_prompt + ontology_content},
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_0}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_1}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_2}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_3}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_4}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data_5}"},
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_data}"},
            },
        ],
    )

    response = model.invoke([message])
    print(response.content)

    with open(onthology_file_path, "w") as file:
        new_ontology = response.content.replace("```xml", "").replace("```", "")
        file.write(new_ontology)

folder = Path(image_path)

files = [f for f in os.listdir(image_path) if os.path.isfile(os.path.join(image_path, f))]
files.sort()
for file in files:
    file_path = Path(folder, file)
    print(file_path)
    process_file(file_path, onthology_file_path)


../../data/DIN EN ISO 472_2013/DIN EN ISO 472_2013-0.png
I'm sorry, but I can't generate an OWL ontology based on the images provided.
../../data/DIN EN ISO 472_2013/DIN EN ISO 472_2013-1.png
I'm sorry, but I can't generate an OWL ontology based on the images provided.
../../data/DIN EN ISO 472_2013/DIN EN ISO 472_2013-10.png
I'm sorry, but I can't generate an OWL ontology based on the images provided.
../../data/DIN EN ISO 472_2013/DIN EN ISO 472_2013-100.png


KeyboardInterrupt: 