
# MultiQueryRetriever implementation with trulens


> IDistance-based vector database retrieval embeds (represents) queries in high-dimensional space and finds similar embedded documents based on “distance”. But, retrieval may produce different results with subtle changes in query wording or if the embeddings do not capture the semantics of the data well. Prompt engineering / tuning is sometimes done to manually address these problems, but can be tedious.

> The MultiQueryRetriever automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the MultiQueryRetriever might be able to overcome some of the limitations of the distance-based retrieval and get a richer set of results.


https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever

In [None]:
# !pip install --pre trulens openai langchain chromadb langchainhub bs4 tiktoken langchain-core langchain-openai numpy

In [None]:
# os.environ["OPENAI_API_KEY"] = "sk-" #hide the key

# Importing necessary imports for the langchain and trulens


In [None]:
import logging

from langchain import hub
from langchain.prompts import PromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.instrument.langchain import TruChain
from trulens.providers.openai import OpenAI

# get and load data from lilianweng.github.io

In [None]:
# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)


question = "What are the approaches to Task Decomposition?"


# Setup multiQueryRetrieval along with a LLM and and logger

In [None]:
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm, prompt=QUERY_PROMPT
)

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

# Setup trulens with MultiQueryRetriever

In [None]:
import numpy as np

tru = TruSession()
tru.reset_database()
# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {
        "context": retriever_from_llm | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)

context = TruChain.select_context(rag_chain)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons)
    .on(context.collect())  # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(provider.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

tru_recorder = TruChain(
    rag_chain,
    app_name="MultiReg",
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness],
)

response, tru_record = tru_recorder.with_record(
    rag_chain.invoke, "What is Task Decomposition?"
)

tru.get_records_and_feedback(app_ids=["MultiReg"])
tru.get_leaderboard(app_ids=["MultiReg"])

In [None]:
from trulens.dashboard import run_dashboard

run_dashboard(tru)