In [1]:
from dotenv import load_dotenv

from pydantic import BaseModel,Field

load_dotenv()

from datetime import date
from typing import  Optional
from enum import Enum
from langchain_core.messages import SystemMessage,HumanMessage
from models import RelevantFacts
from langchain_openai import ChatOpenAI


In [2]:
 

llm_with_structured_output = ChatOpenAI(model="gpt-4.1-mini", temperature=0).with_structured_output(RelevantFacts)


In [3]:
from docling.document_converter import DocumentConverter

source = "./relevant_fact.pdf"  # PDF path or URL
converter = DocumentConverter()
result = converter.convert(source)
document_markdown = result.document.export_to_markdown()

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from prompts import SYSTEM_MESSAGE_TEXT
SYSTEM_MESSAGE = SystemMessage(content=SYSTEM_MESSAGE_TEXT)   

HUMAN_MESSAGE_TEXT = "Here is the relevant fact: "+document_markdown
HUMAN_MESSAGE = HumanMessage(content=HUMAN_MESSAGE_TEXT)

MESSAGES = [SYSTEM_MESSAGE, HUMAN_MESSAGE]
response  = llm_with_structured_output.invoke(MESSAGES  )
response.model_dump()

{'Company': 'Petróleo Brasileiro S.A. - Petrobras',
 'Date': datetime.date(2025, 8, 7),
 'Type': <EventType.DIVIDEND_PAYMENT: 'Dividend Payment'>,
 'Local': {'Country': 'Brazil',
  'State': 'RJ',
  'City': 'Rio de Janeiro',
  'Street': 'Av. Henrique Valadares',
  'Number': '28 - 9º andar',
  'ZipCode': '20031-030'},
 'DividendInfo': {'Type': <StockType.ORDINARIA: 'ORDINARIA'>,
  'Divident': 0.67192409,
  'Date': datetime.date(2025, 8, 21),
  'PaymentDate': datetime.date(2025, 11, 21)}}

In [5]:
import instructor
client = instructor.from_provider("openai/gpt-4.1-mini")
user = client.chat.completions.create(
    response_model=RelevantFacts,
    messages=[{"role": "system","content":SYSTEM_MESSAGE_TEXT},{"role": "user","content":HUMAN_MESSAGE_TEXT}],
    max_retries=3
)
user.model_dump()

{'Company': 'Petróleo Brasileiro S.A. - Petrobras',
 'Date': datetime.date(2025, 8, 7),
 'Type': <EventType.DIVIDEND_PAYMENT: 'Dividend Payment'>,
 'Local': {'Country': 'Brazil',
  'State': 'RJ',
  'City': 'Rio de Janeiro',
  'Street': 'Av. Henrique Valadares',
  'Number': '28 - 9º andar',
  'ZipCode': '20031-030'},
 'DividendInfo': {'Type': <StockType.ORDINARIA: 'ORDINARIA'>,
  'Divident': 0.67192409,
  'Date': datetime.date(2025, 8, 21),
  'PaymentDate': datetime.date(2025, 11, 21)}}

In [None]:
from langchain_openai import ChatOpenAI

from trustcall import create_extractor
llm = ChatOpenAI(model="gpt-4.1-mini",temperature=0)

bound = create_extractor(
    llm,
    tools=[RelevantFacts],
    tool_choice="RelevantFacts",
)


response = bound.invoke(MESSAGES)
response

{'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_CQVpJ2WAnzIfRluOIZIPPvTC', 'name': 'RelevantFacts', 'args': {'Company': 'Petróleo Brasileiro S.A. - Petrobras', 'Date': '2025-08-07', 'Type': 'Dividend Payment', 'Local': {'Country': 'Brazil', 'State': 'RJ', 'City': 'Rio de Janeiro', 'Street': 'Av. Henrique Valadares', 'Number': '28 - 9º andar', 'ZipCode': '20031-030'}, 'DividendInfo': {'Type': 'ORDINARIA', 'Divident': 0.67192409, 'Date': '2025-11-21', 'PaymentDate': '2025-11-21'}}}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 143, 'prompt_tokens': 1690, 'total_tokens': 1833, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_37c45ea698', 'id': 'chatcmpl-C6cNBPIEhRBPEw5VEy2lMqMAuGESm', 'service_tier': 'def

In [7]:
response['responses'][-1].model_dump()


{'Company': 'Petróleo Brasileiro S.A. - Petrobras',
 'Date': datetime.date(2025, 8, 7),
 'Type': <EventType.DIVIDEND_PAYMENT: 'Dividend Payment'>,
 'Local': {'Country': 'Brazil',
  'State': 'RJ',
  'City': 'Rio de Janeiro',
  'Street': 'Av. Henrique Valadares',
  'Number': '28 - 9º andar',
  'ZipCode': '20031-030'},
 'DividendInfo': {'Type': <StockType.ORDINARIA: 'ORDINARIA'>,
  'Divident': 0.67192409,
  'Date': datetime.date(2025, 11, 21),
  'PaymentDate': datetime.date(2025, 11, 21)}}