In [None]:
import os


os.environ['GEMINI_API_KEY'] = os.environ['GEMINI_API_KEY']

os.environ['LANGSMITH_API_KEY']= os.environ['LANGSMITH_API_KEY']
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGSMITH_ENDPOINT']='https://api.smith.langchain.com'
os.environ['LANGSMITH_PROJECT']='RAG-fusion'

from langsmith import traceable
print(os.environ['TASTY_TOAST'])

I am a tasty toast


In [10]:
with open('smallContent.txt', 'r') as f:
    data = f.read()

In [11]:
# splits

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=10)
splits = text_splitter.create_documents([data])
print(splits[1])


page_content='He was too much absorbed with his own thoughts to give any immediate
     answer to my remonstrance. He leaned upon his hand, with his untasted
     breakfast before him, and he stared at the slip of paper which he had
     just drawn from its envelope. Then he took the envelope itself, held
     it up to the light, and very carefully studied both the exterior and
     the flap.'


In [12]:
# embed
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = 'intfloat/e5-large'
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

  embeddings = HuggingFaceEmbeddings(
2025-11-03 11:05:29.760190: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-03 11:05:30.305380: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-03 11:05:30.305420: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-03 11:05:30.309108: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-03 11:05:30.648422: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
T

In [5]:
# LLM
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    google_api_key=os.environ["GEMINI_API_KEY"],
)

In [6]:
# template
from langchain.prompts import ChatPromptTemplate

template = """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines. Original question: {question}"""

from langchain_core.output_parsers import StrOutputParser

prompt = ChatPromptTemplate.from_template(template)
 
# gives 6 questions based on user input
@traceable 
def gen_queries():
    generate_queries = (
        prompt
        | llm
        | StrOutputParser()
        | (lambda x : x.split("\n"))
    )
    return generate_queries

generate_queries = gen_queries()
print(generate_queries)


first=ChatPromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines. Original question: {question}'), additional_kwargs={})]) middle=[ChatGoogleGenerativeAI(model='models/gemini-2.5-flash', google_api_key=SecretStr('**********'), temperature=0.0, max_retries=2, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x7763f4701e40>, default_metadata=(), model_kwargs={}), StrOutputParser(

In [7]:
# retriever
from langchain_community.vectorstores import Chroma

persist_directory = './chroma_e5_db'

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,  
    persist_directory=persist_directory
)
retriever = vectorstore.as_retriever()


In [8]:
from langchain_core.load import dumps, load

# parallel process
def get_unique_union(documents: list[list]):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]

    unique_docs = list(set(flattened_docs))
    return [load(docs) for docs in unique_docs]

question = "I don't understand the context. Give me summary."

@traceable
def get_docs_retrieval_chain():
    retrieval_chain = generate_queries | retriever.map() | get_unique_union

    docs = retrieval_chain.invoke({"question": question})

    print(len(docs))
    print(docs)
    return retrieval_chain

retrieval_chain = get_docs_retrieval_chain()

4
['{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "document", "Document"], "kwargs": {"page_content": "\\"It is Porlock\'s writing,\\" said he thoughtfully. \\"I can hardly doubt\\n     that it is Porlock\'s writing, though I have seen it only twice\\n     before. The Greek e with the peculiar top flourish is distinctive.\\n     But if it is Porlock, then it must be something of the very first\\n     importance.\\"\\n\\n     He was speaking to himself rather than to me; but my vexation\\n     disappeared in the interest which the words awakened.\\n\\n     \\"Who then is Porlock?\\" I asked.", "type": "Document"}}', '{"lc": 1, "type": "constructor", "id": ["langchain", "schema", "document", "Document"], "kwargs": {"page_content": "He was too much absorbed with his own thoughts to give any immediate\\n     answer to my remonstrance. He leaned upon his hand, with his untasted\\n     breakfast before him, and he stared at the slip of paper which he had\\n     just drawn fr

  return [load(docs) for docs in unique_docs]


In [9]:
# Final RAG
from operator import itemgetter

@traceable
def final_query():
    template = """Your job is to give a concise answer. Answer the question based on the context:
    {context},
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)
    final_rag_chain = (
        {"context": retrieval_chain,
        "question": itemgetter("question")}
        | prompt 
        | llm
        | StrOutputParser()
    )

    response = final_rag_chain.invoke({"question": question})
    return response

response = final_query()
print(response)

The context describes a character (likely Sherlock Holmes) examining a mysterious, important message from someone named Porlock, whose distinctive writing he recognizes. He is deeply absorbed in thought about the message. The conversation then shifts to the formidable and sinister Professor Moriarty, a mathematical genius, suggesting a connection between Porlock's message and Moriarty.
