In [None]:
import io
import json
import logging
import os
import uuid
from typing import Any

import oci.ai_document
import oci.object_storage
from oci.config import from_file
from oci.generative_ai_inference import GenerativeAiInferenceClient
from oci.generative_ai_inference.models import (ChatDetails, ImageContent,
                                                TextContent)

In [None]:
config = from_file()

In [None]:
try:
    object_storage_client = oci.object_storage.ObjectStorageClient(config=config)
    ai_document_client = oci.ai_document.AIServiceDocumentClientCompositeOperations(oci.ai_document.AIServiceDocumentClient(config))

    if os.getenv("COMPARTMENT_OCID") is not None:
        compartment_ocid = os.getenv('COMPARTMENT_OCID')
    else:
        raise ValueError("ERROR: Missing configuration key  COMPARTMENT_OCID ")

    if os.getenv("NAMESPACE") is not None:
        namespace = os.getenv('NAMESPACE')
    else:
        raise ValueError("ERROR: Missing configuration key  NAMESPACE_NAME ")

    if os.getenv("BUCKET_NAME") is not None:
        bucket_name = os.getenv('BUCKET_NAME')
    else:
        raise ValueError("ERROR: Missing configuration key  BUCKETNAME ")

    if os.getenv("MODEL_ID") is not None:
        model_id = os.getenv('MODEL_ID')
    else:
        raise ValueError("ERROR: Missing configuration key  MODEL_ID ")

    input_location = oci.ai_document.models.ObjectLocation()
    input_location.namespace_name = namespace
    input_location.bucket_name = bucket_name

    # Setup the output location where processor job results will be created
    output_location = oci.ai_document.models.OutputLocation()
    output_location.namespace_name = namespace
    output_location.bucket_name = bucket_name
    output_location.prefix = "ocr"

except Exception as e:
   logging.error("Error during client configuration: " + str(e))
   raise e

In [None]:
output_location.namespace_name

In [None]:
def upload_file_to_object_storage(bucket_name: str, file_path: str) -> str:
    try:
        object_name = os.path.basename(file_path)
        with open(file_path, 'rb') as file:
            response = object_storage_client.put_object(
                namespace_name=namespace,
                bucket_name=bucket_name,
                object_name=object_name,
                put_object_body=file
            )
            return response
    except Exception as e:
        logging.error("Error uploading file to Object Storage: " + str(e))
        raise e

In [None]:
def get_file_from_object_storage(bucket_name: str, object_name: str) -> Any:
    try:
        response = object_storage_client.get_object(
            namespace_name=namespace,
            bucket_name=bucket_name,
            object_name=object_name
        )
        return response.data.content
    except Exception as e:
        logging.error("Error retrieving file from Object Storage: " + str(e))
        raise e

In [None]:
aiservicedocument_client = oci.ai_document.AIServiceDocumentClientCompositeOperations(
            oci.ai_document.AIServiceDocumentClient(config=config)
        )

In [None]:
def create_processor_job_callback(self, times_called: int, response: Any) -> None:
    print("Waiting for processor lifecycle state to go into succeeded state:", response.data)


In [None]:
def extract_text_from_document(document_file_name: str):
    try:
        text_extraction_feature = oci.ai_document.models.DocumentTextExtractionFeature()

        input_location = oci.ai_document.models.ObjectStorageLocations(
            object_locations=[oci.ai_document.models.ObjectLocation(namespace_name=namespace,
                                                                     bucket_name=bucket_name,
                                                                     object_name=document_file_name)]
        )

        output_loc = oci.ai_document.models.OutputLocation(
            namespace_name=output_location.namespace_name,
            bucket_name=output_location.bucket_name,
            prefix=output_location.prefix
        )

        create_processor_job_details_text_extraction = oci.ai_document.models.CreateProcessorJobDetails(
            display_name=str(uuid.uuid4()),
            compartment_id=compartment_ocid,
            input_location=input_location,
            output_location=output_loc,
            processor_config=oci.ai_document.models.GeneralProcessorConfig(features=[text_extraction_feature])
        )

        print("Calling create_processor with create_processor_job_details_text_extraction:", create_processor_job_details_text_extraction)

        create_processor_response = ai_document_client.create_processor_job_and_wait_for_state(
            create_processor_job_details=create_processor_job_details_text_extraction,
            wait_for_states=[oci.ai_document.models.ProcessorJob.LIFECYCLE_STATE_SUCCEEDED],
            waiter_kwargs={"wait_callback": create_processor_job_callback}
        )

        print("processor call succeeded with status: {} and request_id: {}.".format(create_processor_response.status, create_processor_response.request_id))

        result = create_processor_response.data

        return create_processor_response

    except Exception as e:
        logging.error("Error during document processing: " + str(e))
        raise e

In [None]:
SAMPLE_INVOICES_PATH="sample_invoices/"

In [None]:
response = upload_file_to_object_storage(bucket_name, "./" + SAMPLE_INVOICES_PATH + "image1.pdf")
response

In [None]:
response.status

In [None]:
create_processsor_job_response = extract_text_from_document("image1.pdf")
create_processsor_job_response.data

In [None]:
create_processsor_job_response.data.id

In [None]:
results_path = f"{output_location.prefix}/{create_processsor_job_response.data.id}/{namespace}_{bucket_name}/results/image1.pdf.json"

In [None]:
result = get_file_from_object_storage(bucket_name, results_path)
json.loads(result)

In [None]:
text = ""
for page in json.loads(result)["pages"]:
    for line in page["lines"]:
        print(line["text"])
        text += line["text"] + "\n"
        

In [None]:
generative_ai_client = GenerativeAiInferenceClient(config)

In [None]:
# === Prompt del usuario ===
user_input = """
For the text extracted from the document, generate a json file with the following fields: \
RUC: <Supplier's RUC> # Corresponds to an 11-digit number. \
Company Name: <Supplier's Company Name> \
Address: <Supplier's Address> \
Invoice Number: <Invoice Number> \
Invoice Date: <Invoice Date in format DD/MM/YYYY> \
Total Amount: <Total Amount in format 0.00> \
Currency: <Currency Code, e.g., PEN or USD> \
Tax Amount: <Tax Amount in format 0.00> \
Payment Terms: <Payment Terms, e.g., "Contado" or "30 days">

Additionaly, list the items in the invoice with the following fields for each item: \
Items: \
    - Description: <Item Description> \
    - Quantity: <Item Quantity in format 0.00> \
    - Unit Price: <Item Unit Price in format 0.00> \
    - Total Price: <Item Total Price in format 0.00>

Example of the expected JSON format:
{
  "RUC": "12345678901",
  "Company Name": "ABC S.A.C.",
  "Address": "Av. Example 123, Lima, Peru",
  "Invoice Number": "F001-00012345",
  "Invoice Date": "15/08/2023",
  "Total Amount": "1500.00",
  "Currency": "PEN",
  "Tax Amount": "270.00",
  "Payment Terms": "30 days",
  "Items": [
    {
      "Description": "Product A",
      "Quantity": "2.00",
      "Unit Price": "500.00",
      "Total Price": "1000.00"
    },
    {
      "Description": "Product B",
      "Quantity": "1.00",
      "Unit Price": "500.00",
      "Total Price": "500.00"
    }
  ]
}
    
Please ensure the JSON is properly formatted.

Here is the extracted text from the document:
"""

# --- Construcci√≥n del request ---
content = oci.generative_ai_inference.models.TextContent(text=user_input+"\n\n"+text)
message = oci.generative_ai_inference.models.Message(role="USER", content=[content])

chat_request = oci.generative_ai_inference.models.GenericChatRequest(
    api_format=oci.generative_ai_inference.models.BaseChatRequest.API_FORMAT_GENERIC,
    messages=[message],
    max_tokens=600,
    temperature=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    top_p=0.75,
)

chat_details = oci.generative_ai_inference.models.ChatDetails(
    serving_mode=oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id),
    chat_request=chat_request,
    compartment_id=compartment_ocid,
)

In [None]:
response = generative_ai_client.chat(
    chat_details=chat_details
)

In [None]:
print(response.data.chat_response.choices[0].message.content[0].text)