ref: https://github.com/sakunaharinda/ragatouille-book/blob/main/book/2_Query_Transformation.ipynb

In [None]:
from dotenv import load_dotenv
load_dotenv()
import rich

In [None]:
from langchain_core.prompts import FewShotChatMessagePromptTemplate, ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

examples = [
    {
        'input': 'What happens to the pressure, P, of an ideal gas if the temperature is increased by a factor of 2 and the volume is increased by a factor of 8?',
        'output': 'What are the physics principles behind this question?'
    },
    {
        'input': 'Estella Leopold went to which school between Aug 1954 and Nov 1954?',
        'output': "What was Estella Leopold's education history?"
    }
]

example_prompt = ChatPromptTemplate.from_messages(
        [
            ('human', '{input}'), 
            ('ai', '{output}')
        ]
    )

few_shot_prompt = FewShotChatMessagePromptTemplate(
    examples=examples,
    example_prompt=example_prompt
)

In [None]:
# Test
rich.print(few_shot_prompt.format())
rich.print(few_shot_prompt)

In [None]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ('system', """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:"""),
        few_shot_prompt,
        ('user', '{question}')
    ]
)

In [None]:
rich.print(final_prompt.format(question= "What need to consider when using LLM to eval LLM generation?"))

#### There are few ways you can write the chain
##### Method 1

In [None]:
step_back_query_chain = (
    final_prompt
    | ChatOpenAI(model="gpt-4o-mini", temperature=0.8)
    | StrOutputParser()
)

step_back_query_chain.invoke({"question": "What need to consider when using LLM to eval LLM generation?"})

In [None]:
step_back_query_chain = (
    {"question": RunnablePassthrough()}
    | final_prompt
    | ChatOpenAI(model="gpt-4o-mini", temperature=0.8)
    | StrOutputParser()
)

step_back_query_chain.invoke({"question": "What need to consider when using LLM to eval LLM generation?"})

我們需要用step_back_prompt找回的文件，以及原本query的文件來做出最後的回答

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
loader = DirectoryLoader('../../pdf_files/',glob="*.pdf",loader_cls=PyPDFLoader)
documents = loader.load()

# Split text into chunks

text_splitter  = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
text_chunks = text_splitter.split_documents(documents)

vectorstore = Chroma.from_documents(documents=text_chunks, 
                                    embedding=embedding,
                                    persist_directory="data/vectorstore")
vectorstore.persist()

retriever = vectorstore.as_retriever()

In [None]:
response_prompt_template = """You are an expert of world knowledge. 
I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. 
Otherwise, ignore them if they are not relevant.

<normal_context>
# {normal_context}
</normal_context>

<step_back_context>
# {step_back_context}
</step_back_context>


# Original Question: {question}
# Answer:"""

response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

# Either syntax 1 or syntax 2 works, since only one input "question" is passed to the chain
# Syntax 1
# step_back_and_response_chain = ({"normal_context": RunnablePassthrough() |  retriever,
#      "step_back_context": RunnablePassthrough() | step_back_query_chain | retriever,
#      "question": RunnablePassthrough()}
#      | response_prompt
#      | ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
#      | StrOutputParser()
# )

# Syntax 2
step_back_and_response_chain = (
    {"question": RunnablePassthrough()}
    | {"normal_context": RunnablePassthrough() |  retriever,
     "step_back_context": RunnablePassthrough() | step_back_query_chain | retriever,
     "question": RunnablePassthrough()}
     | response_prompt
     | ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
     | StrOutputParser()
)

In [None]:
res = step_back_and_response_chain.invoke("What need to consider when using LLM to eval LLM generation?")

In [None]:
rich.print(res)