### Custom MBA courses RAG project

In [1]:
# Go one level up in the directories hierarchy to access src directory and codes
import sys
import os
# Add project root to Python path
project_root = os.path.abspath("..")  # go one level up from notebooks/
sys.path.append(project_root)

In [2]:
# Setup necessary models for routing, chatting and embedding
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from core.config.config import Config
from google.genai import types

router_llm = GoogleGenAI(
    model = Config.ROUTER_LLM,
    api_key = Config.GOOGLE_API_KEY,
    generation_config = types.GenerateContentConfig(
        thinking_config = types.ThinkingConfig(thinking_budget = 0),
        temperature = Config.ROUTER_LLM_TEMPERATURE,
    ),
    max_tokens = Config.ROUTER_LLM_MAX_TOKENS
)

embed_model = HuggingFaceEmbedding(
    model_name = Config.EMBEDDING_MODEL
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the collections name and description from my custom JSON file
import json

docs_path = "../documents"
collections_mba_json = docs_path + "/collections_mba.json"

with open(collections_mba_json, "r", encoding = "utf-8") as file:
    COLLECTIONS_MBA = json.load(file)

for collections_name, collection_description in COLLECTIONS_MBA.items():
    COLLECTIONS_MBA[collections_name] = (" \n ").join([line.strip() for line in collection_description.splitlines()[1:-2]])

In [4]:
# Let's build our own Router Retriever based on the my personal pdf database for my MBA courses
# Here we perform ingestion step along with building multiple indices, retrievers on top of indices
# and creating a RouterRetriever from those retrievers (which are wrapped inside RetrieverTool objects)
from core.config.constants import RagConstants
from core.config.config import Config

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.tools import RetrieverTool
from llama_index.core.retrievers import RouterRetriever
from llama_index.core.selectors import LLMMultiSelector


# Initialize the retriever_tools list to create a list of RetrieverTool objects that we will later
# pass into the LLMMultiSelector for selecting an appropriate retriever
retriever_tools = []

# I manually wrote a dictionary for each course and its decription inside the previously loaded JSON file. 
# 'collection_name' matches the name of the course folder inside the documents folder
for collection_name, collection_description in COLLECTIONS_MBA.items():

    collection_path = docs_path + '/' + collection_name

    # 1) Read documents and create list of 'Document' objects, that has id_, metadata, text attributes.
    #    Document class (generic container for any data source) is a subclass of the TextNode class
    collection_documents = SimpleDirectoryReader(input_dir = collection_path).load_data()

    # 2) Read each of this document objects and create index from it
    #    Document objects are parsed into Node objects that have different attributes such as text, embeddings, metadata, relationships.
    #    Document objects are split into multiple nodes (relationships between these nodes are recorded in Node objects as attributes).
    collection_index = VectorStoreIndex.from_documents(
        documents = collection_documents,
        embed_model = embed_model,
        show_progress = True
    )

    # 3) Then we create a retriever from each of those indices that were built on top of those collections of Document objects
    #    To do it, we just call the as_retriever method of the VectorStoreIndex object
    #    We also indicate the similarity_top
    collection_retriever = collection_index.as_retriever(similarity_top_k = Config.SIMILARITY_TOP_K)

    # 4) We wrap those collection retrievers inside the RetrieverTool so that the MultiSelector will be able to select an
    #    appropriate retriever based on its decription
    collection_retriever_tool = RetrieverTool.from_defaults(
        retriever = collection_retriever,
        description = collection_description
    )

    # 5) Append created RetrieverTool for each collection to the list initialized before this loop
    retriever_tools.append(collection_retriever_tool)

# Create a router from that list of RetrieverTool objects using an LLMMultiSelector for selecting relevant retrievers 
# based on a prompt
router = RouterRetriever(
    selector = LLMMultiSelector.from_defaults(
        prompt_template_str = RagConstants.LLM_MULTI_SELECTOR_PROMPT,
        # Maximum number of retrievers to retain - each retriever retrieves nodes from each corresponding colleciton
        max_outputs = Config.ROUTER_RETRIEVER_MAX_OUTPUTS,
        llm = router_llm
    ),
    llm = router_llm,
    retriever_tools = retriever_tools
)

Parsing nodes: 100%|██████████| 337/337 [00:00<00:00, 3596.71it/s]
Generating embeddings: 100%|██████████| 337/337 [00:42<00:00,  7.92it/s]
Parsing nodes: 100%|██████████| 205/205 [00:03<00:00, 68.26it/s]
Generating embeddings: 100%|██████████| 217/217 [00:49<00:00,  4.43it/s]
Parsing nodes: 100%|██████████| 203/203 [00:00<00:00, 2958.59it/s]
Generating embeddings: 100%|██████████| 209/209 [00:20<00:00, 10.12it/s]
Parsing nodes: 100%|██████████| 306/306 [00:00<00:00, 3766.69it/s]
Generating embeddings: 100%|██████████| 306/306 [00:30<00:00,  9.97it/s]
Parsing nodes: 100%|██████████| 292/292 [00:00<00:00, 4171.01it/s]
Generating embeddings: 100%|██████████| 292/292 [00:18<00:00, 15.55it/s]
Parsing nodes: 100%|██████████| 365/365 [00:00<00:00, 3499.15it/s]
Generating embeddings: 100%|██████████| 365/365 [00:43<00:00,  8.46it/s]


In [5]:
from core.helpers.logger import logger
import nest_asyncio
nest_asyncio.apply()

# Now let's try prompting our RAG to see the retrieved nodes
# According to our settings, we should expect no more than 15 nodes

user_name = "Nigga"
user_query = "Are there any rules when it comes to Financial Accounting?"

try:
    retrieved_nodes = router.retrieve(user_query)
except ValueError as e:
    logger.info(f"{e}: knowledge based does not contain relevant info.")
    retrieved_nodes = []

2025-12-14 19:01:33,170 - INFO - AFC is enabled with max remote calls: 10.
2025-12-14 19:01:34,345 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
2025-12-14 19:01:34,348 - INFO - Selecting retriever 0: This choice explicitly mentions 'generally accepted accounting principles in the US (US GAAP) and around the world (IFRS)', which are the rules governing financial accounting..


In [6]:
# Now we need to check the relevance of the retrieved nodes and only derive those nodes 
# that are relevant to the user's query. 
# Are there better ways that just passing it to the LLM? Maybe thresholding? Or just pass it all as a context
# and ask LLM to see the relevance in the chat time?

# Create retrieved_nodes str for LLM to easier make a choice
retrieved_nodes_str = ""
for i in range(len(retrieved_nodes)):
    if i == len(retrieved_nodes) - 1: 
        str_to_add = "\"" + retrieved_nodes[i].id_ + "\": \"\"\"" + retrieved_nodes[i].text + "\"\"\""
    else:
        str_to_add = "\"" + retrieved_nodes[i].id_ + "\": \"\"\""  + retrieved_nodes[i].text + "\"\"\"\n"
    retrieved_nodes_str += str_to_add

print(retrieved_nodes_str)

# Check the relevance using the LLM call

relevance_check_prompt = RagConstants.LLM_RELEVANCE_CHECK_PROMPT.format(
    question = user_query,
    context = retrieved_nodes_str
)

response = await router_llm.acomplete(relevance_check_prompt)
print(response)

2025-12-14 19:01:49,565 - INFO - AFC is enabled with max remote calls: 10.


"57065ba6-c85c-4634-a7cb-1e2d6d721f54": """3
(1) (2)
 (3)
What has this to do with accounting?
“It’s about story-telling”– Is it true for financial accounting?
Movie
(4)"""
"4f1f3e98-4b39-41c8-a7ba-f8092090bd73": """Financial accounting produces reports that are prepared under rules to inform external parties.
What Is Financial Accounting?“It is the (1) information system that (2) measures business activities, (3) processes the information into reports, and (4) communicates the results to decision makers.”
(1) Information system: Generally Accepted Accounting Principles (GAAP) (e.g., US-GAAP, Indian-GAAP etc.)(2) Measuring business activities: In monetary measures –e.g., Tenge, USD, etc.(3) Reports: Financial statements (e.g., Balance sheet, 10K-Form, etc.)(4) Decision makers: Shareholders, creditors, taxing authorities, labour unions, employees, etc.
18
Chapter1"""
"e56c9551-4078-4b5d-9308-3254b809aa07": """Two types of accounting exist: financial and managerial accounting.
What Types

In [7]:
# Now, we derive the relevant context from the LLM response and construct the final
# context string that we will later use in the final LLM call to generate an answer to the user query
import json

# Define function to get the JSON array of relevant node_ids
def extract_json_array(text: str):
    start = text.find("[")
    end = text.rfind("]")
    if start == -1 or end == -1 or end < start:
        raise ValueError("No JSON array found")

    candidate = text[start:end + 1]
    data = json.loads(candidate)

    if not isinstance(data, list):
        raise ValueError("JSON is not a list")

    return data

retrieved_nodes_dict = dict([(node.id_, node.text) for node in retrieved_nodes])

relevant_node_ids = extract_json_array(response.text.strip())

context = "\n\n".join(
    [retrieved_nodes_dict[node_id] for node_id in relevant_node_ids]
)
print(context)

Financial accounting produces reports that are prepared under rules to inform external parties.
What Is Financial Accounting?“It is the (1) information system that (2) measures business activities, (3) processes the information into reports, and (4) communicates the results to decision makers.”
(1) Information system: Generally Accepted Accounting Principles (GAAP) (e.g., US-GAAP, Indian-GAAP etc.)(2) Measuring business activities: In monetary measures –e.g., Tenge, USD, etc.(3) Reports: Financial statements (e.g., Balance sheet, 10K-Form, etc.)(4) Decision makers: Shareholders, creditors, taxing authorities, labour unions, employees, etc.
18
Chapter1

Two types of accounting exist: financial and managerial accounting.
What Types of Accounting Exist?A) Financial accounting: •Prepared underexternal rules (IFRS, US-GAAP)•Audited (KPMG, EY, Deloitte, PwC, etc.)•Shareholders, creditors, tax authorities, labour unions, employees•Monetary units (e.g., Tenge, USD, etc.)•“External” accounting


In [8]:
# Now we check the relevance and output the prompt
final_response = ""

if not context:
    final_response = f"{user_name}, I am sorry, but it seems that I don't have an answer to your question in my knowledge base, or it might be irrelevant."
else:
    final_response = await router_llm.acomplete(
        prompt = RagConstants.SYSTEM_PROMPT.format(
            user_name = user_name,
            question = user_query,
            context = context
        )
    )

print(final_response)

2025-12-14 19:02:12,781 - INFO - AFC is enabled with max remote calls: 10.


Yes, Nigga, financial accounting produces reports that are prepared under rules to inform external parties. These rules are referred to as Generally Accepted Accounting Principles (GAAP), with examples including US-GAAP and Indian-GAAP. Financial accounting is also prepared under external rules such as IFRS and US-GAAP.


In [None]:
from llama_index.core.chat_engine.simple import SimpleChatEngine
from llama_index.core.memory import Memory

chat_memory_buff = Memory.from_defaults(
    session_id = "Nigga_session_id",
    token_limit = 2000,
    chat_history_token_ratio = 1.0
)

chat_engine = SimpleChatEngine.from_defaults(
    llm = router_llm,
    memory = chat_memory_buff
)

In [17]:
prompt = RagConstants.SYSTEM_PROMPT.format(
            user_name = user_name,
            question = user_query,
            context = context
        )
chat_response = await chat_engine.achat(prompt)
chat_response.response

2025-12-14 19:04:23,160 - INFO - AFC is enabled with max remote calls: 10.


'Yes, Nigga, financial accounting produces reports that are prepared under rules to inform external parties. These rules are referred to as Generally Accepted Accounting Principles (GAAP), with examples including US-GAAP and Indian-GAAP. Financial accounting is also prepared under external rules such as IFRS and US-GAAP.'

In [18]:
chat_response2 = await chat_engine.achat("What else can you say?")
chat_response2.response

2025-12-14 19:04:26,238 - INFO - AFC is enabled with max remote calls: 10.


'Nigga, financial accounting is an information system that measures business activities, processes the information into reports, and communicates the results to decision makers. These decision makers include shareholders, creditors, taxing authorities, labor unions, and employees. The reports generated are financial statements, such as the Balance Sheet and the 10K-Form. Financial accounting is considered "external" accounting.'

In [23]:
chat_engine.chat_history

[ChatMessage(role=<MessageRole.USER: 'user'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text="\n            You are a helpful academic assistant specialized in MBA courses for Master of Engineering Management (MEM) students.\n\n            Address the user by their name: Nigga.\n\n            Your task is to answer the user's question using ONLY the information provided in the context below.\n            The context consists of excerpts from official course materials such as lecture slides, course descriptions, or academic notes.\n\n            Rules:\n            - Answer the question clearly, accurately, and concisely.\n            - Use ONLY the provided context to construct your answer.\n            - Do NOT use outside knowledge, assumptions, or general world knowledge.\n            - Do NOT invent facts or fill in missing information.\n            - If the context does not contain enough information to answer the question, explicitly state that the information is