In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## Basic app with the Pydantic Output Parser
* Previously we used the StructuredOutputParser to format the output into a JSON dictionary.
* The StructuredOutputParser is a very simple parser that can only support strings and do not provide options for other data types such as lists or integers.
* The PydanticOutput Parser is an advanced parser that admits many data types and other features like validators. 

In [2]:
from langchain.output_parsers import PydanticOutputParser

In [3]:
from langchain_core.pydantic_v1 import BaseModel, Field, validator

In [4]:
from typing import List

**Define the desired output data structure**

In [5]:
class Suggestions_Output_Structure(BaseModel):
    words: List[str] = Field(
        description="list of substitute words based on the context"
    )
    reasons: List[str] = Field(
        description="the reasoning of why this word fits the context"
    )

    #Throw error if the substitute word starts with a number
    @validator('words')
    def not_start_with_number(cls, info):
        for item in info:
            if item[0].isnumeric():
                raise ValueError("ERROR: The word cannot start with a number")
        return info

    @validator('reasons')
    def end_with_dot(cls, info):
      for idx, item in enumerate(info):
        if item[-1] != ".":
          info[idx] += "."
      return info

**Create the parser**

In [6]:
my_parser = PydanticOutputParser(
    pydantic_object=Suggestions_Output_Structure
)

**Determine the input**

In [7]:
from langchain.prompts import PromptTemplate

In [8]:
my_template = """
Offer a list of suggestions to substitute the specified
target_word based on the present context and the reasoning
for each word.

{format_instructions}

target_word={target_word}
context={context}
"""

In [9]:
my_prompt = PromptTemplate(
    template=my_template,
    input_variables=["target_word", "context"],
    partial_variables={
        "format_instructions": my_parser.get_format_instructions()
    }
)

In [10]:
user_input = my_prompt.format_prompt(
    target_word="loyalty",
    context="""
    The loyalty of the soldier was so great that
    even under severe torture, he refused to betray
    his comrades.
    """
)

In [11]:
from langchain_openai import OpenAI

In [12]:
llm = OpenAI()

In [13]:
output = llm(user_input.to_string())

  warn_deprecated(


**Apply the parser to get the desired output structure**
* Note: the next line of code may fail if the output of the LLM was not correctly formatted JSON. If this happens, running the notebook again should fix the problem. 

In [15]:
my_parser.parse(output)

Suggestions_Output_Structure(words=['devotion', 'allegiance', 'fidelity', 'faithfulness'], reasons=["These words all convey a strong sense of commitment and dedication, which aligns with the context of a soldier's loyalty.", 'Allegiance specifically implies a sense of loyalty to a higher cause or authority, making it a fitting substitute in this context.', "Fidelity and faithfulness both suggest a steadfast and unwavering loyalty, which is appropriate for a soldier's loyalty in the face of torture.", "Devotion conveys a deep and unwavering loyalty and dedication, which is fitting for a soldier's loyalty in the face of adversity."])