In [10]:
from dotenv import load_dotenv

load_dotenv(override=True)

True

In [11]:
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [12]:


def load_vector_store(vector_store_dir: str):
    """Load the FAISS vector store from disk."""
    embeddings = OpenAIEmbeddings(
        model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY")
    )
    return FAISS.load_local(vector_store_dir, embeddings, allow_dangerous_deserialization=True,)


def create_qa_chain(vector_store):
    """Create a question-answering chain using GPT-4."""
    # Initialize the LLM
    llm = ChatOpenAI(
        model_name="gpt-4.1", openai_api_key=os.getenv("OPENAI_API_KEY"), temperature=0.0
    )

    # Create memory for conversation history
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Custom prompt template
    template = """You are an AI assistant that helps answer questions based on the provided context. 
    Use the following pieces of context to answer the question at the end. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    
    Context: {context}
    
    Chat History: {chat_history}
    Human: {question}
    Assistant:"""

    QA_CHAIN_PROMPT = PromptTemplate(
        input_variables=["context", "chat_history", "question"], template=template
    )

    # Create the chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(
            search_kwargs={"k": 3}  # Retrieve top 3 most relevant chunks
        ),
        memory=memory,
        combine_docs_chain_kwargs={"prompt": QA_CHAIN_PROMPT},
    )

    return qa_chain

In [13]:
from langchain.tools import tool

@tool(
        "query_documents",
        description="This tool helps to query the document"
) 
def query_documents(question: str):
    """
    This tool helps to query the document
    
    Args:
        question (str): The question you would like to ask inorder to retrive the information

    Returns:
        list: The query results 
    """
    # Load the vector store
    vector_store_dir: str = os.getenv("RAG_VECTORSTORE_PATH", "vector_store")
    vector_store = load_vector_store(vector_store_dir)

    # Create QA chain
    qa_chain = create_qa_chain(vector_store)

    # Get response
    response = qa_chain({"question": question})

    return response["answer"]

In [14]:
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langgraph.checkpoint.memory import MemorySaver
memory = MemorySaver()
from langchain_openai import ChatOpenAI

BUSINESS_CONTEXT = """
Tesla is a manifacturing company that produces electric cars, solar panels, and energy storage systems.
"""

RAG_AGENT_PROMPT = """
You are a helpful assistant who can answer questions based on the provided context.

You're also given a tool, "query_documents" to retrive the information from relavant document

Take the following steps to provide the answer:
1. write reasoning steps to approach the question.
2. Retrive the relavant information (please feel free to go on multiple iteration until you find disired information)
3. provide the answer to the user

here is the semantic context you can use to understand the business and generate the query:
----*****Semantic Context*****----
{semantic_context}
----*****END OF Semantic Context*****----

Expectation:
- if the information is not present or the result is empty, inform the to the user rather than guessing.
- if you don't have enough information to generate the query, ask for more information.
- please include name whenever possible instead of ids, for example instead of device id provide device name too.
"""

model = ChatOpenAI(
    model="gpt-4.1",
    temperature=0.0
)

rag_agent = create_react_agent(
    name="rag_agent",
    model=model,
    tools=[query_documents],
    prompt=RAG_AGENT_PROMPT.format(semantic_context=BUSINESS_CONTEXT),
    checkpointer=memory,
)


In [32]:
thread = {"configurable": {"thread_id": "02"},    "recursion_limit": 20}
messages = [HumanMessage(content="""
Research and development expenses during Q2-2024? 
""")]

In [33]:
final_message = None
for event in rag_agent.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)
        if v['messages'][-1].content:
            final_message = v['messages'][-1].content

{'messages': [AIMessage(content="Reasoning steps:\n1. The user is asking for Tesla's research and development (R&D) expenses during Q2-2024.\n2. I need to look for financial data or quarterly reports that mention Tesla's R&D expenses for Q2-2024.\n3. I will query the documents with a specific question to retrieve this information.\n\nQuerying for relevant information...", additional_kwargs={'tool_calls': [{'id': 'call_38V8mO8hptsmqbA6edCGyhKy', 'function': {'arguments': '{"question":"What were Tesla\'s research and development expenses during Q2-2024?"}', 'name': 'query_documents'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 105, 'prompt_tokens': 673, 'total_tokens': 778, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'f

In [34]:
final_message

"Tesla's research and development expenses during Q2-2024 were $1,074 million."