# Chamar o LLM #

In [89]:
import os
from dotenv import load_dotenv

from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser, CommaSeparatedListOutputParser
from langchain_openai import AzureChatOpenAI

load_dotenv()

True

In [90]:
model = AzureChatOpenAI(
            azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
            openai_api_key = os.environ["AZURE_OPENAI_API_KEY"],
            deployment_name = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
            api_version = "2023-09-01-preview",
            temperature = 0.0
        )

In [91]:
prompt_template = """
    You are an expert at selecting suppliers that will provide equipments to the company you work for.
    The suppliers fill a spreadsheet with their machines' specifications and based on that, you check if the answers correspond to what your company requires to make them an official supplier.

    In this task, you will analyze this data:
    
    ```
    {df_data}
    ```

    For each supplier answer, you will check if the answer can fill the requirements according to the other fields and return a JSON.

    GUIDELINES:
    - In the dataframe, you must add another field called "COMMENT" and it should only contain "OK" or "NOK". "OK" in case the supplier's answer can fill the requirement or "NOK" in case the supplier's answer does not fill the requirement.
    - You must return a valid JSON structure in your response, without any additional commentary, only the JSON.
    - The JSON structure will be converted to a Dataframe, so return a structure that will make the conversion possible.
    - If there's not enough information to make the analysis, in the field "COMMENT" just write "Not enough information".
    - Don't evaluate the suppliers answers if you don't know if they fill the requirements.
    """

In [92]:
parser = JsonOutputParser()

prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["df_data"]
)

chain = prompt | model | parser


In [93]:
def call_llm(df, supplier, section_name):
    result = chain.invoke({"df_data": df})
    pd.DataFrame(result).to_csv(f'../result/depalletizer/{section_name.replace("/", " ")}.csv', index=False)

# Extrair e tratar dados da planilha de respostas do fornecedor #

In [94]:
import pandas as pd
from openpyxl import load_workbook
import json

In [95]:
def extract_info(file_path, sheet_name, column_letters):
    """
    Extracts data from specific columns in an Excel sheet.

    Args:
        file_path (str): Path to the input Excel file.
        sheet_name (str): Name of the sheet to extract data from.
        column_letters (list of str): List of column letters to extract data from.

    Returns:
        pd.DataFrame: DataFrame containing the extracted data with column names from row 15.
    """
    # Load workbook and sheet
    wb = load_workbook(file_path, data_only=True)
    sheet = wb[sheet_name]

    # Read column names from row 15
    column_names = [sheet[f'{col}15'].value for col in column_letters]

    # Initialize a dictionary to store the data
    data = {col: [] for col in column_names}

    # Extract values from each column, ignoring the first 13 cells and row 15
    for col_letter, column_name in zip(column_letters, column_names):
        column_data = [cell.value for cell in sheet[col_letter] if cell.row > 13]
        # Add data to the corresponding list
        data[column_name] = column_data

    # Find the maximum length of the lists
    max_length = max(len(lst) for lst in data.values())

    # Standardize the length of the lists with missing values (None)
    for key in data:
        while len(data[key]) < max_length:
            data[key].append(None)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(data)

    return df

Planilhas a serem extraídas:

In [96]:
file_path_khs = '../files/KHS.xlsx'
file_path_krones = '../files/KRONES.xlsx'
file_path_sidel = '../files/SIDEL.xlsx'
sheet_filler = 'Empty can depalletizer'
column_filler = ['B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

Seções Empty Can Depalletizer:

In [97]:
depalletizer_sections = {
    "1.0 PERFORMANCE AND WARRANTY": {
        "start": "",
        "end": ""
    },
    "2.0 GENERAL INFORMATION": {
        "start": "",
        "end": ""
    },
    "3.0 PROCESS": {
        "start": "",
        "end": ""
    },
    "4.0 MECHANICAL CHARACTERISTICS": {
        "start": "",
        "end": ""
    },
    "5.0 DIMENSIONS / WEIGHT": {
        "start": "",
        "end": ""
    },
    "6.0 MATERIALS": {
        "start": "",
        "end": ""
    },
    "7.0 SAFETY": {
        "start": "",
        "end": ""
    },
    "8.0 ELECTRICITY, CONTROL, ALARM, AUTOMATION": {
        "start": "",
        "end": ""
    },
    "9.0 POWER CABINETS": {
        "start": "",
        "end": ""
    },
    "10.0 UTILITIES CONSUMPTION": {
        "start": "",
        "end": ""
    },
    "11.0 MAINTENANCE / DOCUMENTATION / TRAINING": {
        "start": "",
        "end": ""
    },
    "12.0 TEC EMPTY CAN DEPALLETIZER ACCEPTANCE": {
        "start": "",
        "end": ""
    },
    "13.0 ZONE DEMANDS DUE TO LOCAL REGULATIONS": {
        "start": "",
        "end": ""
    },
    "14.0 HISTORY OF REVISIONS": {
        "start": "",
        "end": ""
    }
}


Identificar os índices das seções:

In [98]:
def is_section(item):
    return isinstance(item, str) and item in depalletizer_sections

In [99]:
def get_section_index(df):
    section_indexes = df[df['ITEM'].apply(is_section)].index.tolist()
    section_indexes.append(len(df))  # adiciona o índice do final do DataFrame
    return section_indexes

In [100]:
def add_indexes(df, indexes, sections_dict):
    for i in range(len(indexes) - 1):
        start_idx = indexes[i]
        end_idx = indexes[i + 1]
        section_name = df.loc[start_idx, 'ITEM']
        sections_dict[section_name]["start"] = start_idx + 1 
        sections_dict[section_name]["end"] = end_idx

Separar planilhas:

In [101]:
def separate_df(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).rename(columns={'CNV': 'Charachteristic', 'KPIs for Depal': 'UNIT', 'Fails Rate': 'Instruction / Comments'}).drop(index=0, columns=['ITEM', 'Detail', 'Instruction', 'Comments', 'Score']).to_csv(index=False)
    call_llm(df, 'khs', section_name)

In [102]:
def separate_df_performance(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).drop(index=[0, 1], columns=['ITEM', 'Score']).to_csv(index=False)
    call_llm(df, 'khs', section_name)

In [103]:
def separate_df_information_maintenance(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).rename(columns={'CNV': 'Charachteristic', 'KPIs for Depal': 'Instructions'}).drop(index=0, columns=['ITEM', 'Detail', 'Fails Rate', 'Comments', 'Score', 'Instruction']).to_csv(index=False)
    call_llm(df, 'khs', section_name)

In [104]:
def separate_df_mechanical_power(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).rename(columns={'CNV': 'Charachteristic', 'KPIs for Depal': 'Instruction / Comments'}).drop(index=0, columns=['ITEM', 'Detail', 'Fails Rate', 'Instruction', 'Comments', 'Score']).to_csv(index=False)
    call_llm(df, 'khs', section_name)
    

In [105]:
def separate_df_utilities(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).rename(columns={'CNV': 'Charachteristic', 'KPIs for Depal': 'UNIT', 'Fails Rate': 'Instruction / Comments	'}).drop(index=[0, 3], columns=['ITEM', 'Detail', 'Instruction', 'Comments', 'Score']).to_csv(index=False)
    call_llm(df, 'khs', section_name)


In [106]:
def separate_df_zone(df_supplier, sections, section_name):
    df = df_supplier.iloc[sections[section_name]['start']:sections[section_name]['end']].reset_index(drop=True).drop(index=[0], columns=['ITEM', 'KPIs for Depal', 'Fails Rate', 'Score']).rename(columns={'CNV': 'Zone','Instruction / Comments': 'Instruction'}).to_csv(index=False)
    call_llm(df, 'khs', section_name)


# KHS: #

In [107]:
df_khs = extract_info(file_path=file_path_khs, sheet_name=sheet_filler, column_letters=column_filler)

Índices:

In [108]:
khs_indexes = get_section_index(df_khs)
_ = add_indexes(df_khs, khs_indexes, depalletizer_sections)
depalletizer_sections

{'1.0 PERFORMANCE AND WARRANTY': {'start': 1, 'end': 19},
 '2.0 GENERAL INFORMATION': {'start': 20, 'end': 29},
 '3.0 PROCESS': {'start': 30, 'end': 44},
 '4.0 MECHANICAL CHARACTERISTICS': {'start': 45, 'end': 55},
 '5.0 DIMENSIONS / WEIGHT': {'start': 56, 'end': 64},
 '6.0 MATERIALS': {'start': 65, 'end': 78},
 '7.0 SAFETY': {'start': 79, 'end': 83},
 '8.0 ELECTRICITY, CONTROL, ALARM, AUTOMATION': {'start': 84, 'end': 111},
 '9.0 POWER CABINETS': {'start': 112, 'end': 124},
 '10.0 UTILITIES CONSUMPTION': {'start': 125, 'end': 132},
 '11.0 MAINTENANCE / DOCUMENTATION / TRAINING': {'start': 133, 'end': 149},
 '12.0 TEC EMPTY CAN DEPALLETIZER ACCEPTANCE': {'start': 150, 'end': 152},
 '13.0 ZONE DEMANDS DUE TO LOCAL REGULATIONS': {'start': 153, 'end': 162},
 '14.0 HISTORY OF REVISIONS': {'start': 163, 'end': 171}}

Planilhas separadas:

In [109]:
import time

df_performance = separate_df_performance(df_khs, depalletizer_sections, "1.0 PERFORMANCE AND WARRANTY")
df_general_info = separate_df_information_maintenance(df_khs, depalletizer_sections, "2.0 GENERAL INFORMATION")
df_process = separate_df(df_khs, depalletizer_sections, "3.0 PROCESS")

time.sleep(15)

df_characteristics = separate_df_mechanical_power(df_khs, depalletizer_sections, "4.0 MECHANICAL CHARACTERISTICS")
df_dimensions = separate_df(df_khs, depalletizer_sections, "5.0 DIMENSIONS / WEIGHT")
df_materials = separate_df(df_khs, depalletizer_sections, "6.0 MATERIALS")

time.sleep(15)

df_safety = separate_df(df_khs, depalletizer_sections, "7.0 SAFETY")
df_electricity = separate_df(df_khs, depalletizer_sections, "8.0 ELECTRICITY, CONTROL, ALARM, AUTOMATION")
df_power = separate_df_mechanical_power(df_khs, depalletizer_sections, "9.0 POWER CABINETS")

time.sleep(15)

df_utilities = separate_df_utilities(df_khs, depalletizer_sections, "10.0 UTILITIES CONSUMPTION")
df_maintenance = separate_df_information_maintenance(df_khs, depalletizer_sections, "11.0 MAINTENANCE / DOCUMENTATION / TRAINING")
df_acceptance = separate_df(df_khs, depalletizer_sections, "12.0 TEC EMPTY CAN DEPALLETIZER ACCEPTANCE")
df_zone = separate_df_zone(df_khs, depalletizer_sections, "13.0 ZONE DEMANDS DUE TO LOCAL REGULATIONS")