In [15]:

from pydantic import BaseModel
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
import os
from dotenv import load_dotenv
load_dotenv()
from markitdown import MarkItDown


token_provider = get_bearer_token_provider(
    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  #azure_ad_token_provider=token_provider,
  api_key=os.getenv("AZURE_OPENAI_KEY"),
  api_version="2024-10-21"
)


class Fixture(BaseModel):
    name: str
    date: str
    participants: list[str]

completion = client.beta.chat.completions.parse(
    model="gpt-4o", # replace with the model deployment name of your gpt-4o 2024-08-06 deployment
    messages=[
        {"role": "system", "content": "Extract the event information."},
        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
    ],
    response_format=Fixture,
)

event = completion.choices[0].message.parsed

print(event)
print(completion.model_dump_json(indent=2))

name='Science Fair' date='Friday' participants=['Alice', 'Bob']
{
  "id": "chatcmpl-AmQsyGm0UY3kfpXDnSist3ShNxY1B",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "{\"name\":\"Science Fair\",\"date\":\"Friday\",\"participants\":[\"Alice\",\"Bob\"]}",
        "refusal": null,
        "role": "assistant",
        "audio": null,
        "function_call": null,
        "tool_calls": [],
        "parsed": {
          "name": "Science Fair",
          "date": "Friday",
          "participants": [
            "Alice",
            "Bob"
          ]
        }
      },
      "content_filter_results": {
        "hate": {
          "filtered": false,
          "severity": "safe"
        },
        "self_harm": {
          "filtered": false,
          "severity": "safe"
        },
        "sexual": {
          "filtered": false,
          "severity": "safe"
        },
        "violence": {
          "filtered": fals

In [20]:
md = MarkItDown(llm_client=client, llm_model="gpt-4o")
result = md.convert("data/documents/employee_handbook.pdf")
print(result.text_content)

Contoso Electronics
Employee Handbook

This document contains information generated using a language model (Azure OpenAI). The
information contained in this document is only for demonstration purposes and does not
reflect the opinions or beliefs of Microsoft. Microsoft makes no representations or
warranties of any kind, express or implied, about the completeness, accuracy, reliability,
suitability or availability with respect to the information contained in this document.

All rights reserved to Microsoft

Contoso Electronics Employee Handbook
Last Updated: 2023-03-05

Contoso Electronics is a leader in the aerospace industry, providing advanced electronic
components for both commercial and military aircraft. We specialize in creating cutting-
edge systems that are both reliable and efficient. Our mission is to provide the highest
quality aircraft components to our customers, while maintaining a commitment to safety
and excellence. We are proud to have built a strong reputation in th