# [The best library for structured LLM output](https://simmering.dev/blog/structured_output/)

In [5]:
from typing import List, Literal

from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI


# Set up a Pydantic model for the structured output

class Entity(BaseModel):
    name: str = Field(description="name of the entity")
    label: Literal["PERSON", "ORGANIZATION", "LOCATION"]


class ExtractEntities(BaseModel):
    entities: List[Entity]


# Choose a model
llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0)

# Force the model to always use the ExtractEntities schema
llm_with_tools = llm.bind_tools([ExtractEntities], tool_choice="ExtractEntities")

# Add a parser to convert the LLM output to a Pydantic object
chain = llm_with_tools | PydanticToolsParser(tools=[ExtractEntities])

In [7]:
text = """BioNTech SE is set to acquire InstaDeep, \
a Tunis-born and U.K.-based artificial intelligence \
(AI) startup, for up to £562 million\
"""
res = chain.invoke(text)[0]
print(res)

entities=[Entity(name='BioNTech SE', label='ORGANIZATION'), Entity(name='InstaDeep', label='ORGANIZATION'), Entity(name='Tunis', label='LOCATION'), Entity(name='U.K.', label='LOCATION')]


In [15]:
print(type(res.dict()))
print(res.dict().get("entities"))

<class 'dict'>
[{'name': 'BioNTech SE', 'label': 'ORGANIZATION'}, {'name': 'InstaDeep', 'label': 'ORGANIZATION'}, {'name': 'Tunis', 'label': 'LOCATION'}, {'name': 'U.K.', 'label': 'LOCATION'}]


In [4]:
from typing import List, Literal

from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI


# Set up a Pydantic model for the structured output

class Entity(BaseModel):
    name: str = Field(description="name of the entity")
    label: Literal["PERSON", "ORGANIZATION", "LOCATION"]


class ExtractEntities(BaseModel):
    entities: List[Entity]


# Choose a model
llm = ChatOpenAI(model="gpt-4-turbo", temperature=0.0)

# Force the model to always use the ExtractEntities schema
llm_with_tools = llm.bind_tools([ExtractEntities], tool_choice="ExtractEntities")

# Add a parser to convert the LLM output to a Pydantic object
chain = llm_with_tools | PydanticToolsParser(tools=[ExtractEntities])

text = """BioNTech SE is set to acquire InstaDeep, \
a Tunis-born and U.K.-based artificial intelligence \
(AI) startup, for up to £562 million\
"""
chain.invoke(text)[0]

ExtractEntities(entities=[Entity(name='BioNTech SE', label='ORGANIZATION'), Entity(name='InstaDeep', label='ORGANIZATION'), Entity(name='Tunis', label='LOCATION'), Entity(name='U.K.', label='LOCATION')])

## add prompt

In [17]:
from typing import List, Literal

from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI


# Set up a Pydantic model for the structured output

class Entity(BaseModel):
    name: str = Field(description="name of the entity")
    label: Literal["PERSON", "ORGANIZATION", "LOCATION"]


class ExtractEntities(BaseModel):
    entities: List[Entity]


# Choose a model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)

prompt = PromptTemplate(
    template="Distill the entities from the user's input, if no available return empty.\n\
            User:{query}\n",
            input_variables=["query"],
            )

# Force the model to always use the ExtractEntities schema
llm_with_tools = llm.bind_tools([ExtractEntities], tool_choice="ExtractEntities")

# Add a parser to convert the LLM output to a Pydantic object
chain = prompt | llm_with_tools | PydanticToolsParser(tools=[ExtractEntities])

In [18]:
llm_with_tools.invoke("Hi")

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_M61So3JM99t91y36GCwdXCBu', 'function': {'arguments': '{"entities":[{"name":"John Doe","label":"PERSON"},{"name":"OpenAI","label":"ORGANIZATION"},{"name":"New York","label":"LOCATION"}]}', 'name': 'ExtractEntities'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 34, 'prompt_tokens': 76, 'total_tokens': 110, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-f4450b31-631c-4252-b7ca-0b6a58ef7a23-0', tool_calls=[{'name': 'ExtractEntities', 'args': {'entities': [{'name': 'John Doe', 'label': 'PERSON'}, {'name': 'OpenAI', 'label': 'ORGANIZATION'}, {'name': 'New York', 'label': 'LOCATION'}]}, 'id': 'call_M61So3JM99t91y36GCwdXCBu', 'type': 'tool_call'}], usage_metadata={'input_tokens': 76, 'output_tokens': 34, 'total_tokens':

In [21]:
text
res = chain.invoke({"query": text})
print(type(res))
print(res)

<class 'list'>
[ExtractEntities(entities=[Entity(name='BioNTech SE', label='ORGANIZATION'), Entity(name='InstaDeep', label='ORGANIZATION'), Entity(name='Tunis', label='LOCATION'), Entity(name='U.K.', label='LOCATION')])]


In [24]:
res[0]

ExtractEntities(entities=[Entity(name='BioNTech SE', label='ORGANIZATION'), Entity(name='InstaDeep', label='ORGANIZATION'), Entity(name='Tunis', label='LOCATION'), Entity(name='U.K.', label='LOCATION')])

In [25]:
res[0].entities

[Entity(name='BioNTech SE', label='ORGANIZATION'),
 Entity(name='InstaDeep', label='ORGANIZATION'),
 Entity(name='Tunis', label='LOCATION'),
 Entity(name='U.K.', label='LOCATION')]

In [26]:
for entitr in res[0].entities:
    print(entitr.name, entitr.label)

BioNTech SE ORGANIZATION
InstaDeep ORGANIZATION
Tunis LOCATION
U.K. LOCATION
