Setup

In [None]:
from auto_flow.core.llm.openai.openai_llm import OpenAILLM
from auto_flow.core.prompts.chat_template import ChatTemplate
from auto_flow.core.rag.retrivers.wiki_retriever import WikiRetriever

llm = OpenAILLM(model="gpt-4o", temperature=0)
wiki = WikiRetriever(top_k=6, doc_content_chars_max=2000)
prompt = ChatTemplate.from_messages(
    [
        (
            "system",
            "You're a helpful AI assistant. Given a user question and some Wikipedia article snippets, answer the user question. If none of the articles answer the question, just say you don't know.\n\nHere are the Wikipedia articles:{context}",
        ),
        ("human", "{input}")
    ]
)

In [None]:
from auto_flow.core.rag.document.document import Document
from typing import List


def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Article Title: {doc.metadata['title']}\nArticle Snippet: {doc.text}"
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)

In [None]:
from auto_flow.core.llm.message_parser import StrOutParser
from auto_flow.core.flow.flow import to_flow, ParallelFlow, identity
from operator import itemgetter

context = itemgetter("docs") | to_flow(format_docs)
answer = prompt | llm | StrOutParser()
flow = (
    ParallelFlow(input=identity, docs=wiki)
    .assign(context=context)
    .assign(answer=answer)
    .pick("answer", "docs")
)

In [None]:
flow.invoke("How fast are cheetahs?")

Function-calling

In [None]:
from typing import List
from pydantic import BaseModel, Field


class cited_answer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        description="The answer to the user question, which is based only on the given sources."
    )
    citations: List[int] = Field(
        description="The integer IDs of the SPECIFIC sources which justify the answer."
    )

In [None]:
llm_with_tool = llm.bind_tools([cited_answer], tool_choice="cited_answer")
example_q = """What Brian's height?

Source: 1
Information: Suzy is 6'2"

Source: 2
Information: Jeremiah is blonde

Source: 3
Information: Brian is 3 inches shorted than Suzy"""
llm_with_tool.invoke(example_q)

In [None]:
from auto_flow.core.llm.message_parser import MessagePydanticOutParser

output_parser = MessagePydanticOutParser(return_dict=True, return_first=True)
(llm_with_tool | output_parser).invoke(example_q)

In [None]:
from auto_flow.core.rag.document.document import Document
from auto_flow.core.flow.flow import to_flow, ParallelFlow, identity
from operator import itemgetter


def format_docs_with_id(docs: List[Document]) -> str:
    formatted = [
        f"Source ID: {i}\nArticle Title: {doc.metadata['title']}\nArticle Snippet: {doc.text}"
        for i, doc in enumerate(docs)
    ]
    return "\n\n" + "\n\n".join(formatted)


format_1 = itemgetter("docs") | to_flow(format_docs_with_id)
answer_1 = prompt | llm_with_tool | output_parser
chain_1 = (
    ParallelFlow(input=identity, docs=wiki)
    .assign(context=format_1)
    .assign(cited_answer=answer_1)
    .pick("cited_answer", "docs")
)

In [None]:
chain_1.invoke("How fast are cheetahs?")

Cite snippets

In [None]:
class Citation(BaseModel):
    source_id: int = Field(
        description="The integer ID of a SPECIFIC source which justifies the answer.",
    )
    quote: str = Field(
        description="The VERBATIM quote from the specified source that justifies the answer."
    )


class quoted_answer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        description="The answer to the user question, which is based only on the given sources."
    )
    citations: List[Citation] = Field(
        description="Citations from the given sources that justify the answer."
    )

In [None]:
output_parser_2 = MessagePydanticOutParser(return_dict=True, return_first=True)
llm_with_tool_2 = llm.bind_tools([quoted_answer], tool_choice="quoted_answer")
format_2 = itemgetter("docs") | to_flow(format_docs_with_id)
answer_2 = prompt | llm_with_tool_2 | output_parser_2
chain_2 = (
    ParallelFlow(input=identity, docs=wiki)
    .assign(context=format_2)
    .assign(quoted_answer=answer_2)
    .pick("quoted_answer", "docs")
)

In [None]:
chain_2.invoke("How fast are cheetahs?")