# Building the Knowledge Base

In [1]:
from datasets import load_dataset

data = load_dataset('squad', split='train')
data

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})

In [2]:
data = data.to_pandas()
data.head()

Unnamed: 0,id,title,context,question,answers
0,5733be284776f41900661182,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,"{'text': ['Saint Bernadette Soubirous'], 'answ..."
1,5733be284776f4190066117f,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,"{'text': ['a copper statue of Christ'], 'answe..."
2,5733be284776f41900661180,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,"{'text': ['the Main Building'], 'answer_start'..."
3,5733be284776f41900661181,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What is the Grotto at Notre Dame?,{'text': ['a Marian place of prayer and reflec...
4,5733be284776f4190066117e,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",What sits on top of the Main Building at Notre...,{'text': ['a golden statue of the Virgin Mary'...


In [3]:
data.drop_duplicates(subset='context', keep='first', inplace=True)
data.head()

Unnamed: 0,id,title,context,question,answers
0,5733be284776f41900661182,University_of_Notre_Dame,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,"{'text': ['Saint Bernadette Soubirous'], 'answ..."
5,5733bf84d058e614000b61be,University_of_Notre_Dame,"As at most other universities, Notre Dame's st...",When did the Scholastic Magazine of Notre dame...,"{'text': ['September 1876'], 'answer_start': [..."
10,5733bed24776f41900661188,University_of_Notre_Dame,The university is the major seat of the Congre...,Where is the headquarters of the Congregation ...,"{'text': ['Rome'], 'answer_start': [119]}"
15,5733a6424776f41900660f51,University_of_Notre_Dame,The College of Engineering was established in ...,How many BS level degrees are offered in the C...,"{'text': ['eight'], 'answer_start': [487]}"
20,5733a70c4776f41900660f64,University_of_Notre_Dame,All of Notre Dame's undergraduate students are...,What entity provides help with the management ...,"{'text': ['Learning Resource Center'], 'answer..."


# Initialize the Embedding Model and Vector DB

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

prompt = "Represent this sentence for searching relevant passages: "
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
query_encode_kwargs = {"prompt": prompt}

embed = HuggingFaceEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs,
    query_encode_kwargs=query_encode_kwargs
)

2026-02-21 07:24:01.552547: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
from pinecone import Pinecone

pc = Pinecone()

In [6]:
from pinecone import ServerlessSpec

spec = ServerlessSpec(
    cloud="aws", region="us-east-1"
)

In [7]:
import time

index_name = "langchain-retrieval-agent"
existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]

# check if index already exists (it shouldn't if this is first time)
if index_name not in existing_indexes:
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=384,  # dimensionality of BAAI/bge-small-en
        metric='dotproduct',
        spec=spec
    )
    # wait for index to be initialized
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'dotproduct',
 'namespaces': {'': {'vector_count': 5000}},
 'total_vector_count': 5000,
 'vector_type': 'dense'}

# Indexing

In [8]:
from tqdm.auto import tqdm

batch_size = 100
data = data[0:5000]  # for demo purposes, only use 5000 records

texts = []
metadatas = []

for i in tqdm(range(0, len(data), batch_size)):
    # get end of batch
    i_end = min(len(data), i+batch_size)
    batch = data.iloc[i:i_end]
    # first get metadata fields for this record
    metadatas = [{
        'title': record['title'],
        'text': record['context']
    } for j, record in batch.iterrows()]
    # get the list of contexts / documents
    documents = batch['context']
    # create document embeddings
    embeds = embed.embed_documents(documents)
    # get IDs
    ids = batch['id']
    # add everything to pinecone
    index.upsert(vectors=zip(ids, embeds, metadatas))

  0%|          | 0/50 [00:00<?, ?it/s]

# Creating a Vector Store and Querying

In [9]:
from langchain_pinecone import PineconeVectorStore

# initialize the vector store object
vectorstore = PineconeVectorStore(index=index, embedding=embed)

In [10]:
query = "when was the college of engineering in the University of Notre Dame established?"

vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)

[Document(id='5733a55a4776f41900660f3a', metadata={'title': 'University_of_Notre_Dame'}, page_content='The School of Architecture was established in 1899, although degrees in architecture were first awarded by the university in 1898. Today the school, housed in Bond Hall, offers a five-year undergraduate program leading to the Bachelor of Architecture degree. All undergraduate students study the third year of the program in Rome. The university is globally recognized for its Notre Dame School of Architecture, a faculty that teaches (pre-modernist) traditional and classical architecture and urban planning (e.g. following the principles of New Urbanism and New Classical Architecture). It also awards the renowned annual Driehaus Architecture Prize.'),
 Document(id='5733a6424776f41900660f51', metadata={'title': 'University_of_Notre_Dame'}, page_content='The College of Engineering was established in 1920, however, early courses in civil and mechanical engineering were a part of the College 

# Conversational Agent

## RAG chatbot

In [11]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnablePassthrough
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.output_parsers import StrOutputParser

# ── 1. LLM setup ────────────────────────────────────────────────────────────────
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    max_new_tokens=512,
    temperature=0.1,
)
chat_llm = ChatHuggingFace(llm=llm)

# ── 2. Session history store ─────────────────────────────────────────────────────
store = {}
K_WINDOW = 5  # number of turns to keep, change this to adjust window size

def get_session_history(session_id: str) -> InMemoryChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    
    history = store[session_id]
    
    # Window here — chain doesn't need to know about windowing at all
    if len(history.messages) > K_WINDOW * 2:
        trimmed = history.messages[-(K_WINDOW * 2):]
        history.clear()
        for msg in trimmed:
            history.add_message(msg)
    
    return history

# ── 4. Retriever ─────────────────────────────────────────────────────────────────
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 3}  # top 3 most relevant chunks
)

def format_docs(docs) -> str:
    return "\n\n".join(doc.page_content for doc in docs)

# ── 5. Prompt ────────────────────────────────────────────────────────────────────
prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful assistant that answers questions
     based on the provided context and conversation history.

     Use the following retrieved context to answer the question.
     If you cannot find the answer in the context, say so clearly
     rather than making up an answer.

     Context:
     {context}
     """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
])

# ── 6. Core LCEL chain ─────────────────────────────
chain = (
    RunnablePassthrough.assign(
        context=lambda x: format_docs(retriever.invoke(x["input"])),
    )
    | prompt
    | chat_llm
    | StrOutputParser()
)


qa = RunnableWithMessageHistory(
    chain,
    # Pass a lambda that returns only the last K_WINDOW turns instead of full history
    get_session_history=get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

response = qa.invoke(
    {"input": query},
    config={"configurable": {"session_id": "user_123"}}
)
print(response)

The College of Engineering at the University of Notre Dame was established in 1920. However, early courses in civil and mechanical engineering were a part of the College of Science since the 1870s.


## RAG Agent

In [12]:
from langchain_core.tools import tool

@tool
def knowledge_base(query: str) -> str:
    """Use this tool when answering general knowledge queries to get
    more information about the topic."""
    docs = retriever.invoke(query)
    return format_docs(docs)

@tool
def final_answer(answer: str, tools_used: list[str]) -> str:
    """Use this tool to deliver your final response to the user.
    Always use this tool to send your answer.
    Args:
        answer: Your complete response in natural language
        tools_used: List of tool names used, empty list if none used
    """
    return {"answer": answer, "tools_used": tools_used}

tools = [knowledge_base, final_answer]
name2tool = {tool.name: tool.func for tool in tools}

In [13]:
agent_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a helpful conversational assistant with access 
     to a knowledge base tool.

     When answering questions:
     - Use the knowledge_base tool to retrieve relevant information
       before answering any factual or domain-specific questions
     - After retrieving information, synthesize it into a clear,
       conversational response
     - For simple conversational exchanges (greetings, thanks, etc.)
       you do NOT need to use the tool
     - Always base your answers on retrieved information rather than
       assumptions
     - If the knowledge base does not contain relevant information,
       say so clearly

     You must call the final_answer tool to deliver your response
     to the user.
     """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# Model compatible with tool use - automatic routing based on availability
llm = HuggingFaceEndpoint(
    repo_id="Qwen/Qwen2.5-14B-Instruct",
    task="text-generation",
    max_new_tokens=512,
    temperature=0.1,
)

chat_llm = ChatHuggingFace(llm=llm)

agent_runnable = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: x["chat_history"],
        "agent_scratchpad": lambda x: x.get("agent_scratchpad", [])
    }
    | agent_prompt
    | chat_llm.bind_tools(tools)
)

In [14]:
from langchain_core.messages import ToolMessage

class RAGAgentExecutor:
    def __init__(
        self,
        max_iterations: int = 3,
        verbose: bool = False
    ):
        self.max_iterations = max_iterations
        self.verbose = verbose
        self.agent = agent_runnable
        self.name2tool = name2tool

    def invoke(self, input: str, session_id: str = "default") -> dict:
        count = 0
        agent_scratchpad = []
        final_response = None

        # ── Fetch windowed history directly via get_session_history ──────
        chat_history = get_session_history(session_id).messages

        if self.verbose:
            print(f"\n{'='*60}")
            print(f"Input: {input}")
            print(f"Session: {session_id}")
            print(f"History turns: {len(chat_history) // 2}")
            print(f"{'='*60}\n")

        while count < self.max_iterations:
            try:
                response = self.agent.invoke({
                    "input": input,
                    "chat_history": chat_history,
                    "agent_scratchpad": agent_scratchpad
                })
            except Exception as e:
                print(f"Agent error at iteration {count}: {e}")
                final_response = "I encountered an error processing your request."
                break

            agent_scratchpad.append(response)

            if self.verbose:
                print(f"Iteration {count} response: {response}")

            # ── Case 1: No tool calls → direct answer ────────────────────
            if not response.tool_calls:
                final_response = response.content
                if self.verbose:
                    print(f"\nDirect answer (no tool call): {final_response}")
                break

            # ── Case 2: Process all tool calls ───────────────────────────
            for tool_call_obj in response.tool_calls:
                tool_name    = tool_call_obj["name"]
                tool_args    = tool_call_obj["args"]
                tool_call_id = tool_call_obj["id"]

                if tool_name not in self.name2tool:
                    if self.verbose:
                        print(f"Unknown tool: {tool_name}, skipping...")
                    continue

                try:
                    tool_output = self.name2tool[tool_name](**tool_args)
                except Exception as e:
                    tool_output = f"Tool execution error: {e}"

                agent_scratchpad.append(
                    ToolMessage(
                        content=str(tool_output),
                        tool_call_id=tool_call_id
                    )
                )

                if self.verbose:
                    print(f"{count}: {tool_name}({tool_args}) -> {tool_output}")

            count += 1

            # ── Check if final_answer was among the tool calls ────────────
            if any(tc["name"] == "final_answer" for tc in response.tool_calls):
                final_tc = next(
                    tc for tc in response.tool_calls
                    if tc["name"] == "final_answer"
                )
                final_response = final_tc["args"]["answer"]
                break

        # ── early_stopping_method='generate' equivalent ──────────────────
        if final_response is None:
            if self.verbose:
                print("\nMax iterations reached — generating best effort response")

            tool_results = [
                msg.content for msg in agent_scratchpad
                if isinstance(msg, ToolMessage)
            ]

            final_response = (
                f"Based on the available information: {tool_results[-1]}"
                if tool_results else
                "I was unable to find sufficient information to answer "
                "your question within the allowed iterations."
            )

        # ── Save clean turn to session history via get_session_history ────
        session_history = get_session_history(session_id)
        session_history.add_user_message(input)
        session_history.add_ai_message(final_response)

        if self.verbose:
            print(f"\n{'='*60}")
            print(f"Final Answer: {final_response}")
            print(f"{'='*60}\n")

        return {
            "input": input,
            "output": final_response,
            "session_id": session_id
        }

# Initialize — no session_manager needed
agent_executor = RAGAgentExecutor(
    max_iterations=3,
    verbose=True
)

In [15]:
response = agent_executor.invoke(
    query,
    session_id="user_124"
)


Input: when was the college of engineering in the University of Notre Dame established?
Session: user_124
History turns: 0

Iteration 0 response: content='' additional_kwargs={'tool_calls': [{'function': {'arguments': '{"query":"establishment date of College of Engineering at University of Notre Dame"}', 'name': 'knowledge_base', 'description': None}, 'id': 'qwen-0-1771639316902', 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 435, 'total_tokens': 465}, 'model_name': 'Qwen/Qwen2.5-14B-Instruct', 'system_fingerprint': '', 'finish_reason': 'tool_calls', 'logprobs': None} id='lc_run--019c7dee-8ba3-7983-8950-8c20565df847-0' tool_calls=[{'name': 'knowledge_base', 'args': {'query': 'establishment date of College of Engineering at University of Notre Dame'}, 'id': 'qwen-0-1771639316902', 'type': 'tool_call'}] invalid_tool_calls=[] usage_metadata={'input_tokens': 435, 'output_tokens': 30, 'total_tokens': 465}
0: knowledge_base({'query': 'est

In [16]:
query = "what is 2 * 7?"
response = agent_executor.invoke(
    query,
    session_id="user_124"
)


Input: what is 2 * 7?
Session: user_124
History turns: 1

Iteration 0 response: content='旵\n{"name": "final_answer", "arguments": {"answer": "2 * 7 equals 14.", "tools_used": []}}\n</tool_call>' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 497, 'total_tokens': 529}, 'model_name': 'Qwen/Qwen2.5-14B-Instruct', 'system_fingerprint': '', 'finish_reason': 'stop', 'logprobs': None} id='lc_run--019c7def-0b49-7f10-b3c8-122cfcb102ca-0' tool_calls=[] invalid_tool_calls=[] usage_metadata={'input_tokens': 497, 'output_tokens': 32, 'total_tokens': 529}

Direct answer (no tool call): 旵
{"name": "final_answer", "arguments": {"answer": "2 * 7 equals 14.", "tools_used": []}}
</tool_call>

Final Answer: 旵
{"name": "final_answer", "arguments": {"answer": "2 * 7 equals 14.", "tools_used": []}}
</tool_call>



In [17]:
query = "can you tell me some facts about the University of Notre Dame?"
response = agent_executor.invoke(
    query,
    session_id="user_124"
)


Input: can you tell me some facts about the University of Notre Dame?
Session: user_124
History turns: 2

Iteration 0 response: content="Certainly! Here are some interesting facts about the University of Notre Dame:\n\n- The university was founded in 1842 by Father Edward Sorin, a priest of the Congregation of Holy Cross.\n- It is located in Notre Dame, Indiana, and is known for its beautiful campus, which includes many historic buildings.\n- The Fighting Irish is the nickname of the university's athletic teams, which compete in the NCAA Division I as members of the Atlantic Coast Conference.\n- The university is renowned for its strong programs in engineering, business, and science, among others.\n- The Grotto of Our Lady of Lourdes, a replica of the shrine in Lourdes, France, is a popular site on campus where students often pray and leave offerings.\n\nWould you like to know more about any specific aspect of the university?\n" additional_kwargs={'tool_calls': [{'function': {'argumen

In [18]:
query = "can you summarize these facts in two short sentences"
response = agent_executor.invoke(
    query,
    session_id="user_124"
)


Input: can you summarize these facts in two short sentences
Session: user_124
History turns: 3

Iteration 0 response: content="Certainly! The University of Notre Dame, founded in 1842, is located in Notre Dame, Indiana, and is renowned for its strong academic programs and 'Fighting Irish' athletic teams competing in NCAA Division I.\n" additional_kwargs={'tool_calls': [{'function': {'arguments': '{"answer":"The University of Notre Dame, founded in 1842, is located in Notre Dame, Indiana, and is renowned for its strong academic programs and \'Fighting Irish\' athletic teams competing in NCAA Division I.","tools_used":[]}', 'name': 'final_answer', 'description': None}, 'id': 'qwen-0-1771639385082', 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 113, 'prompt_tokens': 692, 'total_tokens': 805}, 'model_name': 'Qwen/Qwen2.5-14B-Instruct', 'system_fingerprint': '', 'finish_reason': 'tool_calls', 'logprobs': None} id='lc_run--019c7def-7fab-77f1-97f5-412702c4994b

In [19]:
pc.delete_index(index_name)