## LangChain Output Parsers


In [104]:
from dotenv import load_dotenv

load_dotenv()

True

#### 1. String Output Parser


`Without Chain`


In [109]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Prompt
prompt = ChatPromptTemplate.from_template("List three cities that start with the letter '{letter}'.")
prompt_text = prompt.format(letter="P")

# Define the parser
parser = StrOutputParser()

# Build the LCEL (LangChain Expression Language) Chain: Prompt | Model | Parser
response = llm.invoke(prompt_text)

# Invoke the chain
result_str = parser.parse(response.content)

In [110]:
print(f"Raw String Output")
print(result_str)
print(f"Type: {type(result_str)}")

Raw String Output
Sure! Here are three cities that start with the letter 'P':

1. Paris (France)
2. Phoenix (United States)
3. Pune (India)
Type: <class 'str'>


`With Chain`


In [111]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Prompt
prompt = ChatPromptTemplate.from_template("List three cities that start with the letter '{letter}'.")

# Define the parser
parser = StrOutputParser()

# Build the LCEL (LangChain Expression Language) Chain: Prompt | Model | Parser
chain = prompt | llm | parser

# Invoke the chain
result_str = chain.invoke({"letter": "P"})

In [112]:
print(f"Raw String Output")
print(result_str)
print(f"Type: {type(result_str)}")

Raw String Output
Here are three cities that start with the letter 'P':

1. Paris (France)
2. Philadelphia (USA)
3. Pune (India)
Type: <class 'str'>


---

#### 2. Comma Separated List Output Parser


`With get_format_instructions`


In [113]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser

# LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Instantiate the specialized parser
list_parser = CommaSeparatedListOutputParser()

# The key step: get the required formatting instructions!
format_instructions = list_parser.get_format_instructions()  # this hold the parser instruction prompt
print(format_instructions)

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [114]:
# Inject the instructions into the prompt template
list_prompt = ChatPromptTemplate.from_template("List three {item_type}. \n\n{format_instructions}")

# partial - we can fill some values beforehand
list_prompt_partial = list_prompt.partial(format_instructions=format_instructions)
print(list_prompt_partial)

input_variables=['item_type'] input_types={} partial_variables={'format_instructions': 'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['format_instructions', 'item_type'], input_types={}, partial_variables={}, template='List three {item_type}. \n\n{format_instructions}'), additional_kwargs={})]


In [115]:
# Build the LCEL Chain
list_chain = list_prompt_partial | llm | list_parser

# Invoke the chain
result_list = list_chain.invoke({"item_type": "unique ai models"})

In [116]:
print(f"List Output")
print(result_list)
print(f"Type: {type(result_list)}")

List Output
['GPT-3', 'BERT', 'DALL-E']
Type: <class 'list'>


`Without get_format_instructions`


In [118]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate

# Set up the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Use the JSON Output Parser
parser = CommaSeparatedListOutputParser()

# Define your prompt template
template = "List three {item_type}. Provide in comma separated values."
prompt = PromptTemplate.from_template(template=template)

# Parse the LLM response
chain = prompt | llm | parser

response = chain.invoke({"item_type": "unique ai models"})
print(response)
print(type(response))

['GPT-4', 'DALL-E 2', 'BERT']
<class 'list'>


---

#### 3. JSON Output Parser


`With get_format_instructions`


In [119]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate

# Set up the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Use the JSON Output Parser
parser = JsonOutputParser()
format_instructions = parser.get_format_instructions()
print(format_instructions)

Return a JSON object.


In [120]:
# Define your prompt template
template = "Extract the name and age from this text: {text}\n\n{format_instructions}"
prompt = PromptTemplate(
    template=template,
    input_variables=["text"],
    partial_variables={"format_instructions": format_instructions},
)

# Parse the LLM response
chain = prompt | llm | parser

response = chain.invoke({"text": "John Doe is 30 years old."})
print(response)
print(type(response))

{'name': 'John Doe', 'age': 30}
<class 'dict'>


`Without get_format_instructions`


In [122]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate

# Set up the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Use the JSON Output Parser
parser = JsonOutputParser()

# Define your prompt template
template = "Extract the name and age from this text: {text}. Provide json output."
prompt = PromptTemplate.from_template(template)

# Parse the LLM response
chain = prompt | llm | parser

response = chain.invoke({"text": "John Doe is 30 years old."})
print(response)
print(type(response))

{'name': 'John Doe', 'age': 30}
<class 'dict'>


---

#### 4. Pydantic Output Parser


`What is Pydantic?`


In [123]:
from pydantic import BaseModel, Field


class Laptop(BaseModel):
    name: str = Field(description="Provide the name of the Laptop")
    processor: str = Field(description="Provide the name of the processor used in the laptop")
    generation: int = Field(description="Provide the processor generation")

In [124]:
laptop_1 = Laptop(name="Macbook Air", processor="M4", generation=4)
print(laptop_1)

name='Macbook Air' processor='M4' generation=4


In [125]:
print(laptop_1.name)
print(laptop_1.processor)

Macbook Air
M4


In [129]:
laptop_2 = Laptop(name="Macbook Pro", processor="M5", generation="5th")

ValidationError: 1 validation error for Laptop
generation
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='5th', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/int_parsing

`Json Output Parser with Pydantic Class`


In [130]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field


# Optional: Define a Pydantic model for structured output
class Person(BaseModel):
    name: str = Field(description="The person's name")
    age: int = Field(description="The person's age")


# Set up the LLM
llm = ChatOpenAI(model="gpt-4o-mini")

# Use the JSON Output Parser with optional Pydantic model
parser = JsonOutputParser(pydantic_object=Person)

# Get format instructions from the parser
format_instructions = parser.get_format_instructions()
print(format_instructions)

STRICT OUTPUT FORMAT:
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema (shown in a code block for readability only â€” do not include any backticks or Markdown in your output)

In [131]:
# Define your prompt template
template = """Extract the name and age from this text: {text}

{format_instructions}"""

prompt = PromptTemplate(
    template=template,
    input_variables=["text"],
    partial_variables={
        "format_instructions": format_instructions,
    },
)

# Create the chain
chain = prompt | llm | parser

# Invoke the chain
response = chain.invoke({"text": "John Doe is 30 years old."})

print(response)
print(type(response))

{'name': 'John Doe', 'age': 30}
<class 'dict'>


`Pydantic Output Parser`


In [132]:
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from typing import List

# LLM
llm = ChatOpenAI(model="gpt-4o-mini")


# Define the exact structure we want the LLM to return
class Recipe(BaseModel):
    name: str = Field(description="The formal name of the recipe.")
    ingredients: List[str] = Field(description="A list of 3 main ingredients.")
    prep_time_minutes: int = Field(description="The preparation time in minutes.")


# Instantiate the parser with the Pydantic model
pydantic_parser = PydanticOutputParser(pydantic_object=Recipe)

# Get the Pydantic JSON schema instructions
pydantic_instructions = pydantic_parser.get_format_instructions()
print(pydantic_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"description": "The formal name of the recipe.", "title": "Name", "type": "string"}, "ingredients": {"description": "A list of 3 main ingredients.", "items": {"type": "string"}, "title": "Ingredients", "type": "array"}, "prep_time_minutes": {"description": "The preparation time in minutes.", "title": "Prep Time Minutes", "type": "integer"}}, "required": ["name", "ingredients", "prep_time_minutes"]}
```


In [133]:
# Create the prompt, injecting the instructions
pydantic_prompt = ChatPromptTemplate.from_template(
    "Create a simple recipe for {food_item}. \n\n{pydantic_instructions}"
)

# Build the LCEL Chain
pydantic_chain = pydantic_prompt.partial(pydantic_instructions=pydantic_instructions) | llm | pydantic_parser

# Invoke the chain
result_object: Recipe = pydantic_chain.invoke({"food_item": "tacos"})

In [134]:
print(f"Pydantic Object Output")
print(f"Name: {result_object.name}")
print(f"Prep Time: {result_object.prep_time_minutes} minutes")
print(f"First Ingredient: {result_object.ingredients[0]}")
print(f"Type: {type(result_object)}")

Pydantic Object Output
Name: Simple Tacos
Prep Time: 15 minutes
First Ingredient: Taco shells
Type: <class '__main__.Recipe'>


`Another way to use Pydantic Output Parser`
but it will only work with OpenAI models


In [136]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from typing import List

# LLM
llm = ChatOpenAI(model="gpt-4o-mini")


# Define the exact structure we want the LLM to return
class Recipe(BaseModel):
    name: str = Field(description="The formal name of the recipe.")
    ingredients: List[str] = Field(description="A list of 3 main ingredients.")
    prep_time_minutes: int = Field(description="The preparation time in minutes.")
    number_of_people: int = Field(description="How many people required to cook?")


# Create the prompt, injecting the instructions
pydantic_prompt = ChatPromptTemplate.from_template("Create a simple recipe for {food_item}.")

# LLM with structured output
llm_structured = llm.with_structured_output(schema=Recipe)

# Build the LCEL Chain
pydantic_chain = pydantic_prompt | llm_structured

# Invoke the chain
result_object: Recipe = pydantic_chain.invoke({"food_item": "tacos"})

print(f"Pydantic Object Output")
print(f"Name: {result_object.name}")
print(f"Prep Time: {result_object.prep_time_minutes} minutes")
print(f"First Ingredient: {result_object.ingredients[0]}")
print(f"Type: {type(result_object)}")

Pydantic Object Output
Name: Simple Beef Tacos
Prep Time: 15 minutes
First Ingredient: Ground beef
Type: <class '__main__.Recipe'>


In [137]:
result_object.number_of_people

4