<a href="https://colab.research.google.com/github/waloar/chagpt-prompts/blob/main/LangChain_PydanticOutputParser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Lang Chain PydanticOutputParser
or *Conversational Directed Graph Traversal with Lang Chain*

This notebook explores the use of Lang Chain for a specific business usecase for LLMs. Feel free to read the article [here](https://medium.com/@danielwarfield1/conversations-as-directed-graphs-with-lang-chain-46d70e1a846c).


In [1]:
#copying from google drive to local
from google.colab import drive
import os
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
with open ("/content/drive/MyDrive/chatgpt-test/apikey.txt", "r") as myfile:
    OPENAI_API_TOKEN = myfile.read()
    os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN
print('API Key Loaded!')

API Key Loaded!


In [3]:
!pip install langchain

Collecting langchain
  Downloading langchain-0.0.314-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.1-py3-none-any.whl (27 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langsmith<0.1.0,>=0.0.43 (from langchain)
  Downloading langsmith-0.0.43-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.0/40.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)
  Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langcha

In [4]:
!pip install openai

Collecting openai
  Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m752.4 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.1


# Loading Libraries

In [5]:
from langchain.llms import OpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator

# Definind Utilities

In [6]:
"""Defining utility functions for constructing a readable exchange
"""

def system_output(output):
    """Function for printing out to the user
    """
    print('======= Bot =======')
    print(output)

def user_input():
    """Function for getting user input
    """
    print('======= Human Input =======')
    return input('input: ')

def parsing_info(output):
    """Function for printing out key info
    """
    print(f'*Info* {output}')


# Defining an Edge

In [7]:
from typing import List

class Edge:

    """Edge
    at it's highest level, an edge checks if an input is good, then parses
    data out of that input if it is good
    """

    def __init__(self, condition, parse_prompt, parse_class, llm, max_retrys=3, out_node=None):
        """
        condition (str): a True/False question about the input
        parse_query (str): what the parser whould be extracting
        parse_class (Pydantic BaseModel): the structure of the parse
        llm (LangChain LLM): the large language model being used
        """
        self.condition = condition
        self.parse_prompt = parse_prompt
        self.parse_class = parse_class
        self.llm = llm

        #how many times the edge has failed, for any reason, for deciding to skip
        #when successful this resets to 0 for posterity.
        self.num_fails = 0

        #how many retrys are acceptable
        self.max_retrys = max_retrys

        #the node the edge directs towards
        self.out_node = out_node

    def check(self, _input):
        """ask the llm if the input satisfies the condition
        """
        validation_query = f'following the output schema, does the input satisfy the condition?\ninput:{_input}\ncondition:{self.condition}'
        class Validation(BaseModel):
            is_valid: bool = Field(description="if the condition is satisfied")
        parser = PydanticOutputParser(pydantic_object=Validation)
        _input = f"Answer the user query.\n{parser.get_format_instructions()}\n{validation_query}\n"
        return parser.parse(self.llm(_input)).is_valid

    def parse(self, _input):
        """ask the llm to parse the parse_class, based on the parse_prompt, from the input
        """
        parse_query = f'{self.parse_prompt}:\n\n"{_input}"'
        parser = PydanticOutputParser(pydantic_object=self.parse_class)
        _input = f"Answer the user query.\n{parser.get_format_instructions()}\n{parse_query}\n"
        return parser.parse(self.llm(_input))


    def execute(self, _input):
        """Executes the entire edge
        returns a dictionary:
        {
            continue: bool,       weather or not should continue to next
            result: parse_class,  the parsed result, if applicable
            num_fails: int        the number of failed attempts
            continue_to: Node     the Node the edge continues to
        }
        """

        #input did't make it past the input condition for the edge
        if not self.check(_input):
            self.num_fails += 1
            if self.num_fails >= self.max_retrys:
                return {'continue': True, 'result': None, 'num_fails': self.num_fails, 'continue_to': self.out_node}
            return {'continue': False, 'result': None, 'num_fails': self.num_fails, 'continue_to': self.out_node}

        try:
            #attempting to parse
            self.num_fails = 0
            return {'continue': True, 'result': self.parse(_input), 'num_fails': self.num_fails, 'continue_to': self.out_node}
        except:
            #there was some error in parsing.
            #note, using the retry or correction parser here might be a good idea
            self.num_fails += 1
            if self.num_fails >= self.max_retrys:
                return {'continue': True, 'result': None, 'num_fails': self.num_fails, 'continue_to': self.out_node}
            return {'continue': False, 'result': None, 'num_fails': self.num_fails, 'continue_to': self.out_node}


"""Running a few unit tests
"""
if True:

    #defining the model used on the edge
    model_name = "text-davinci-003"
    temperature = 0.0
    model = OpenAI(model_name=model_name, temperature=temperature)

    #defining the desired output format, a list of fruits
    class sampleOutputTemplate(BaseModel):
        output: List[str] = Field(description="a list of only fruits")

    #defining the query for the condition, and parse prompt
    condition = "Does the input contain fruits?"
    parse_prompt = "extract only the fruits from the following text. Do not extract any food items besides pure fruits."

    #defining the edge
    testEdge = Edge(condition = condition,
                    parse_prompt = parse_prompt,
                    parse_class = sampleOutputTemplate,
                    llm = model)

    #a sample input from the user
    sample_input = "my favorite deserts are chocolate covered strawberries, oreos, bannana splits, and cake."

    print('===== testing the condition functionality =====')
    print(f'weather or not the input \n"{sample_input}"\nsatisfies the condition\n"{condition}"')
    print('result: {}'.format(testEdge.check(sample_input)))

    print('===== parse results =====')
    print(testEdge.parse(sample_input).output)

    print('===== Testing full edge execution =====')
    print(testEdge.execute(sample_input))

    print('===== Testing a few failed executions =====')
    print(testEdge.execute('Mr.Rodgers was the closest thing to a perfect person yet attained'))
    print(testEdge.execute('Without a doubt, The Chessapeak is the greater of the two bays'))
    print(testEdge.execute('max retrys is 3 by default, so giving up'))

    print('===== Testing a successful parse =====')
    print(testEdge.execute('My favorite tart contains a cinnamon grahm-cracker crust with\
                            Vanilla custard, peach preserve, strawberries, and blueberries'))

===== testing the condition functionality =====
weather or not the input 
"my favorite deserts are chocolate covered strawberries, oreos, bannana splits, and cake."
satisfies the condition
"Does the input contain fruits?"
result: True
===== parse results =====
['strawberries', 'bannana']
===== Testing full edge execution =====
{'continue': True, 'result': sampleOutputTemplate(output=['strawberries', 'bannana']), 'num_fails': 0, 'continue_to': None}
===== Testing a few failed executions =====
{'continue': False, 'result': None, 'num_fails': 1, 'continue_to': None}
{'continue': False, 'result': None, 'num_fails': 2, 'continue_to': None}
{'continue': True, 'result': None, 'num_fails': 3, 'continue_to': None}
===== Testing a successful parse =====
{'continue': True, 'result': sampleOutputTemplate(output=['peach', 'strawberries', 'blueberries']), 'num_fails': 0, 'continue_to': None}


# Defining a Node

In [11]:
class Node:

    """Node
    at it's highest level, a node asks a user for some input, and trys
    that input on all edges. It also manages and executes all
    the edges it contains
    """

    def __init__(self, prompt, retry_prompt):
        """
        prompt (str): what to ask the user
        retry_prompt (str): what to ask the user if all edges fail
        parse_class (Pydantic BaseModel): the structure of the parse
        llm (LangChain LLM): the large language model being used
        """

        self.prompt = prompt
        self.retry_prompt = retry_prompt
        self.edges = []

    def run_to_continue(self, _input):
        """Run all edges until one continues
        returns the result of the continuing edge, or None
        """
        for edge in self.edges:
            res = edge.execute(_input)
            if res['continue']: return res
        return None

    def execute(self):
        """Handles the current conversational state
        prompots the user, tries again, runs edges, etc.
        returns the result from an adge
        """

        #initial prompt for the conversational state
        system_output(self.prompt)

        while True:
            #getting users input
            _input = user_input()

            #running through edges
            res = self.run_to_continue(_input)

            if res is not None:
                #parse successful
                parsing_info(f'parse results: {res}')
                return res

            #unsuccessful, prompting retry
            system_output(self.retry_prompt)


"""Testing
asking the user for an email or phone number, and attemtpting to parse either one
"""
if True:

    #defining the model used in this test
    model_name = "text-davinci-003"
    temperature = 0.0
    model = OpenAI(model_name=model_name, temperature=temperature)

    #Defining 2 edges from the node
    class sampleOutputTemplate(BaseModel):
        output: str = Field(description="contact information")
    condition1 = "Does the input contain a full and valid email?"
    parse_prompt1 = "extract the email from the following text."
    edge1 = Edge(condition1, parse_prompt1, sampleOutputTemplate, model)
    condition2 = "Does the input contain a phone number?"
    parse_prompt2 = "extract the phone number from the following text."
    edge2 = Edge(condition2, parse_prompt2, sampleOutputTemplate, model)

    #Defining A Node
    test_node = Node(prompt = "Please input your full email address or phone number",
                     retry_prompt = "I'm sorry, I didn't understand your response.\nPlease provide a full email address or phone number(in the format xxx-xxx-xxxx)")

    #Defining Connections
    test_node.edges = [edge1, edge2]

    #running node. This handles all i/o and the logic to re-ask on failure.
    res = test_node.execute()


Please input your full email address or phone number
input: roperto@hotmail.com
*Info* parse results: {'continue': True, 'result': sampleOutputTemplate(output='roperto@hotmail.com'), 'num_fails': 0, 'continue_to': None}


In [12]:
res

{'continue': True,
 'result': sampleOutputTemplate(output='roperto@hotmail.com'),
 'num_fails': 0,
 'continue_to': None}

# Implimenting Conversation

In [16]:
"""Implimenting the conversation as a directed graph
"""

# Defining Nodes
name_node = Node("Hello! My name's Dana and I'll be getting you started on your renting journey. I'll be asking you a few questions, and then forwarding you to one of our excellent agents to help you find a place you'd love to call home.\n\nFirst, can you please provide your name?", "I'm sorry, I don't understand, can you provide just your name?")
contact_node = Node("do you have a phone number or email we can use to contact you?", "I'm sorry, I didn't understand that. Can you please provide a valid email or phone number?")
budget_node = Node("What is your monthly budget for rent?", "I'm sorry, I don't understand the rent you provided. Try providing your rent in a format like '$1,300'")
avail_node = Node("Great, When is your soonest availability?", "I'm sorry, one more time, can you please provide a date you're willing to meet?")

#Defining Data Structures for Parsing
class nameTemplate(BaseModel): output: str = Field(description="a persons name")
class phoneTemplate(BaseModel): output: str = Field(description="phone number")
class emailTemplate(BaseModel): output: str = Field(description="email address")
class budgetTemplate(BaseModel): output: float = Field(description="budget")
class dateTemplate(BaseModel): output: str = Field(description="date")

#defining the model
model_name = "text-davinci-003"
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

#Defining Edges
name_edge = Edge("Does the input contain a persons name?", " Extract the persons name from the following text.", nameTemplate, model)
contact_phone_edge = Edge("does the input contain a valid phone number?", "extract the phone number in the format xxx-xxx-xxxx", phoneTemplate, model)
contact_email_edge = Edge("does the input contain a valid email?", "extract the email from the following text", emailTemplate, model)
budget_edge = Edge("Does the input contain a number in the thousands?", "Extract the number from the following text from the following text. Remove any symbols and multiply a number followed by the letter 'k' to thousands.", budgetTemplate, model)
avail_edge = Edge("does the input contain a date or day? dates or relative terms like 'tommorrow' or 'in 2 days'.", "extract the day discussed in the following text as a date in mm/dd/yyyy format. Today is September 23rd 2023.", dateTemplate, model)

#Defining Node Connections
name_node.edges = [name_edge]
contact_node.edges = [contact_phone_edge, contact_email_edge]
budget_node.edges = [budget_edge]
avail_node.edges = [avail_edge]

#defining edge connections
name_edge.out_node = contact_node
contact_phone_edge.out_node = budget_node
contact_email_edge.out_node = budget_node
budget_edge.out_node = avail_node

#running the graph
current_node = name_node
while current_node is not None:
    res = current_node.execute()
    if res['continue']:
        current_node = res['continue_to']
        print(current_node)

Hello! My name's Dana and I'll be getting you started on your renting journey. I'll be asking you a few questions, and then forwarding you to one of our excellent agents to help you find a place you'd love to call home.

First, can you please provide your name?
input: walter
I'm sorry, I don't understand, can you provide just your name?
input: roberto
*Info* parse results: {'continue': True, 'result': nameTemplate(output='roberto'), 'num_fails': 0, 'continue_to': <__main__.Node object at 0x7d8541796170>}
<__main__.Node object at 0x7d8541796170>
do you have a phone number or email we can use to contact you?


KeyboardInterrupt: ignored

In [33]:
"""Implimenting the conversation as a directed graph
"""

# Defining Nodes
intencion_node = Node("Hola! soy Dana tu asistente. Estoy para asistirte. En el camino seguramente te hare algunas pregutnas. En que puedo ayudarte ? ", "Disculpa, no te entiendo. Podrias repetir ?")
name_node = Node("Hello! My name's Dana and I'll be getting you started on your renting journey. I'll be asking you a few questions, and then forwarding you to one of our excellent agents to help you find a place you'd love to call home.\n\nFirst, can you please provide your name?", "I'm sorry, I don't understand, can you provide just your name?")
dni_node = Node("Podrias decirme tu DNI ?", "Disculpa, no entiendo. Necesitamos tu numero de documento para poder continuar. Podrias facilitarlo ?")
contact_node = Node("do you have a phone number or email we can use to contact you?", "I'm sorry, I didn't understand that. Can you please provide a valid email or phone number?")
budget_node = Node("What is your monthly budget for rent?", "I'm sorry, I don't understand the rent you provided. Try providing your rent in a format like '$1,300'")
avail_node = Node("Great, When is your soonest availability?", "I'm sorry, one more time, can you please provide a date you're willing to meet?")

#Defining Data Structures for Parsing
class saludoTemplate(BaseModel): output: str = Field(description="un saludo")
class nameTemplate(BaseModel): output: str = Field(description="a persons name")
class dniTemplate(BaseModel): output: str = Field(description="documento nacional de identidad de entre 7 u 8 numeros")
class phoneTemplate(BaseModel): output: str = Field(description="phone number")
class emailTemplate(BaseModel): output: str = Field(description="email address")
class budgetTemplate(BaseModel): output: float = Field(description="budget")
class dateTemplate(BaseModel): output: str = Field(description="date")

#defining the model
model_name = "text-davinci-003"
temperature = 0.0
model = OpenAI(model_name=model_name, temperature=temperature)

#Defining Edges
intencion_edge = Edge("El input contiene un saludo o frase? ", "Resume la frase o saludo en un maximo de dos palabras.", saludoTemplate, model)
name_edge = Edge("Does the input contain a persons name?", " Extract the persons name from the following text.", nameTemplate, model)
dni_edge = Edge("El input contiene un numero de entre 7 u 8 digitos ?", "Language: Spanish. El documento  es un numero de entre 7 u 8 digitos.  Ejemplo: 92837232 o 0928272 o 23.659.494. Extrae el DNI con el siguiente formato XX.XXX.XXX.", dniTemplate, model)
contact_phone_edge = Edge("does the input contain a valid phone number?", "extract the phone number in the format xxx-xxx-xxxx", phoneTemplate, model)
contact_email_edge = Edge("does the input contain a valid email?", "extract the email from the following text", emailTemplate, model)
budget_edge = Edge("Does the input contain a number in the thousands?", "Extract the number from the following text from the following text. Remove any symbols and multiply a number followed by the letter 'k' to thousands.", budgetTemplate, model)
avail_edge = Edge("does the input contain a date or day? dates or relative terms like 'tommorrow' or 'in 2 days'.", "extract the day discussed in the following text as a date in mm/dd/yyyy format. Today is September 23rd 2023.", dateTemplate, model)

#Defining Node Connections
intencion_node.edges = [intencion_node]
name_node.edges = [name_edge]
dni_node.edges = [dni_edge]
contact_node.edges = [contact_phone_edge, contact_email_edge]
budget_node.edges = [budget_edge]
avail_node.edges = [avail_edge]

#defining edge connections
intencion_edge.out_node = name_edge
name_edge.out_node = dni_node
dni_edge.out_node = contact_node
contact_phone_edge.out_node = budget_node
contact_email_edge.out_node = budget_node
budget_edge.out_node = avail_node

#running the graph
current_node = name_node
while current_node is not None:
    res = current_node.execute()
    print(res)
    if res['continue']:
        current_node = res['continue_to']


Hello! My name's Dana and I'll be getting you started on your renting journey. I'll be asking you a few questions, and then forwarding you to one of our excellent agents to help you find a place you'd love to call home.

First, can you please provide your name?
input: helena
*Info* parse results: {'continue': True, 'result': nameTemplate(output='helena'), 'num_fails': 0, 'continue_to': <__main__.Node object at 0x7d85419e3cd0>}
{'continue': True, 'result': nameTemplate(output='helena'), 'num_fails': 0, 'continue_to': <__main__.Node object at 0x7d85419e3cd0>}
Podrias decirme tu DNI ?
input: 34733833
Disculpa, no entiendo. Necesitamos tu numero de documento para poder continuar. Podrias facilitarlo ?
input: documento 287272223
*Info* parse results: {'continue': True, 'result': dniTemplate(output='28.727.222.3'), 'num_fails': 0, 'continue_to': <__main__.Node object at 0x7d85419e3340>}
{'continue': True, 'result': dniTemplate(output='28.727.222.3'), 'num_fails': 0, 'continue_to': <__main__.

KeyboardInterrupt: ignored