In [None]:
import pandas as pd

In [None]:
df = pd.read_excel("../files/KHS.xlsx", sheet_name="CAN Filler", header=None)
df.head(15)

In [None]:
df.head(50)

# Performance and Warranty #

In [None]:
performance_start = df[df[1] == '1.0 PERFORMANCE and WARRANTY'].index[0]
general_info_start = df[df[1] == '2.0 GENERAL INFORMATION'].index[0]

In [None]:
performance_df = df.loc[performance_start + 1 : general_info_start - 1]

In [None]:
performance_df

In [None]:
performance_df.columns = performance_df.iloc[0]
performance_df = performance_df[1:]
performance_df

In [None]:
performance_df = performance_df[["CNV", "ITEM DESCRIPTION", "UNIT", "INDEX", "Instruction / Comments", "Supplier Answers"]]
performance_df

In [None]:
import json

df_json = performance_df.to_json(orient='records', index=False)
json_str = json.loads(df_json)
json_performance = json.dumps(json_str, indent=2, sort_keys=False, ensure_ascii=False)
print(json_performance)


# Constructive characteristics #

In [None]:
constructive_start = df[df[1] == '4.0 CONSTRUCTIVE CHARACTERISTICS'].index[0]
accessories_info_start = df[df[1] == '5.0 FILLER ACCESSORIES'].index[0]

In [None]:
constructive_df = df.loc[constructive_start + 1 : accessories_info_start - 1]

In [None]:
constructive_df = constructive_df.rename(columns={1: "Item", 2: "Characteristic", 4: "Unit", 5: "Instruction / Comments", 8: "Supplier Answers"}).iloc[1: , :].drop(columns=[0, 3, 6, 7])

In [None]:
constructive_df

In [None]:
import json

df_json = constructive_df.to_json(orient='records', index=False)
json_str = json.loads(df_json)
json_performance = json.dumps(json_str, indent=2, sort_keys=False, ensure_ascii=False)
print(json_performance)

# LLM Classification #

In [None]:
import os
from dotenv import load_dotenv

from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
from langchain_openai import AzureChatOpenAI

In [None]:
load_dotenv()

In [None]:
model = AzureChatOpenAI(
            azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
            openai_api_key = os.environ["AZURE_OPENAI_API_KEY"],
            deployment_name = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
            api_version = "2023-09-01-preview",
            temperature = 0.0
        )

In [None]:
class PerformanceEvaluation(BaseModel):
    CNV: str = Field(default=None),
    INDEX: str = Field(default=None),
    DESCRIPTION: str = Field(default=None),
    Instruction: str = Field(default=None),
    SUPPLIER_ANSWERS: str = Field(default=None),
    UNIT: str = Field(default=None),
    COMMENT: str = Field(default=None)

In [None]:
parser = JsonOutputParser()
# parser = JsonOutputParser(pydantic_object=PerformanceEvaluation)

In [None]:
#parser = PydanticOutputParser(pydantic_object=PerformanceEvaluation)

In [None]:
prompt_template = """
    You are an expert at selecting suppliers that will provide equipments to the company you work for.
    The suppliers fill a spreadsheet with their machines' specifications and based on that, you check if the answers correspond to what your company requires to make them an official supplier.

    In this task, you will analyze this data:
    
    ```
    {json_data}
    ```

    For each JSON object, you will check if the supplier's answer can fill the requirements according to the other fields.
    The CNV field describes what is being analyzed, in case you need more information.

    GUIDELINES:
    - For each JSON object, you should add another field called "COMMENT" and it should only contain "OK" or "NOK". "OK" in case the supplier's answer can fill the requirement or "NOK" in case the supplier's answer does not fill the requirement.
    - For each JSON object, you should add another field called "REASON", and it should contain the reason for you to label the answer as "OK" or "NOK".
    - Your response should only contain a valid JSON with the analysis made.
    - If there's not enough information to make the analysis, in the field "COMMENT" just write "Not enough information".
    - Don't evaluate the suppliers answers if you don't know if they fill the requirements.
    """

In [None]:
prompt_template = """
    You are an expert at selecting suppliers that will provide equipments to the company you work for.
    The suppliers fill a spreadsheet with their machines' specifications and based on that, you check if the answers correspond to what your company requires to make them an official supplier.

    In this task, you will analyze this data:
    
    ```
    {json_data}
    ```

    For each JSON object, you will check if the supplier's answer can fill the requirements according to the ITEM DESCRIPTION and/or the INDEX fields.
    The CNV field describes what is being analyzed, in case you need more information.

    GUIDELINES:
    - For each JSON object, you should add another field called "COMMENT" and it should only contain "OK" or "NOK". "OK" in case the supplier's answer can fill the requirement or "NOK" in case the supplier's answer does not fill the requirement.
    - For each JSON object, you should add another field called "REASON", and it should contain the reason for you to label the answer as "OK" or "NOK".
    - Your response should only contain a valid JSON with the analysis made.
    """

In [None]:
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["json_data"]
)

In [None]:
chain = prompt | model | parser

In [None]:
json_data = json_performance

In [None]:
result = chain.invoke({"json_data": json_data})

In [None]:
result