In [2]:
import os
import json
from src.dialogue_tools import create_rag, generate_answers, generate_follow_up_questions, cross_agent_verification, aggregate_final_response, organize_into_json, display_from_json
from src.vectorstore import VectorstoreHandler
from src.models import init_llm

########################################
# Configuration
########################################
SOURCES_DIR = "sources"
JSON_PATH = "multi_source_conversation.json"
AGGREGATION_LLM_NAME = "Llama3.2-3b"

# Dynamically create one RAG config per subfolder under sources
subfolders = [d for d in os.listdir(SOURCES_DIR) if os.path.isdir(os.path.join(SOURCES_DIR, d))]
RAG_CONFIG = {}
for sub in subfolders:
    RAG_CONFIG[sub] = {
        "subfolder": sub,
        "llm_name": "Llama3.2-3b",
        "emb_name": "hf-minilm-l6-v2",
        "k": 10,
    }

handler = VectorstoreHandler(SOURCES_DIR, force_rebuild=False)
rags = {name: create_rag(SOURCES_DIR, cfg, handler) for name, cfg in RAG_CONFIG.items()}
aggregation_llm = init_llm(AGGREGATION_LLM_NAME)


  from .autonotebook import tqdm as notebook_tqdm


Loading existing vectorstore for sources/book...
Loading existing vectorstore for sources/lectures...


In [3]:
# Example question
original_prompt = "Explain how the kidneys regulate fluid and electrolyte balance. Include the processes of filtration, reabsorption, secretion, and excretion" 

########################################
# 1) Ask each RAG for initial answer
########################################
initial_responses = generate_answers(original_prompt, rags)

########################################
# 2) Aggregation LLM -> follow-up question for each RAG
########################################
agent_answers = {rag_name: data["answer"] for rag_name, data in initial_responses.items()}
follow_up_questions = generate_follow_up_questions(aggregation_llm, original_prompt, agent_answers)

########################################
# 3) Each RAG answers its follow-up question
########################################
follow_up_responses = {}
for agent_name, question in follow_up_questions.items():
    # Each agent gets exactly one follow-up question
    answers = generate_answers(question, {agent_name: rags[agent_name]})
    follow_up_responses[agent_name] = answers[agent_name]

########################################
# 4) Cross-reference: pick one other RAG to verify each answer
########################################
# We'll do a round-robin: for agent i, use agent (i+1) mod len(rags) to verify

rag_names = list(rags.keys())
cross_input = {}
for i, agent_name in enumerate(rag_names):
    target_answer = follow_up_responses[agent_name]
    # choose a verifying agent
    verifier_name = rag_names[(i+1) % len(rag_names)]
    cross_input.setdefault(agent_name, {})[verifier_name] = {
        "prompt": original_prompt,
        "follow_up": follow_up_questions[agent_name],
        "answer": target_answer["answer"],
        "docs": target_answer["docs"]
    }

cross_agent_responses = {}
for target_rag, verifiers_dict in cross_input.items():
    cross_agent_responses[target_rag] = {}
    for verifier_rag, content in verifiers_dict.items():
        # cross_agent_verification expects {rag_name: answer_dict}, so build minimal structure
        cross_agent_responses[target_rag][verifier_rag] = cross_agent_verification(
            {verifier_rag: rags[verifier_rag]},
            {target_rag: content},
            content["prompt"],
            {target_rag: content["follow_up"]}
        ).get(target_rag, {})

########################################
# 5) Final aggregated answer
########################################
all_responses = {}
for agent_name in rags.keys():
    all_responses[agent_name] = {
        "initial": initial_responses[agent_name]["answer"],
        "follow_up": follow_up_responses[agent_name]["answer"] if agent_name in follow_up_responses else "",
    }

final_answer = aggregate_final_response(aggregation_llm, all_responses, original_prompt)

########################################
# 6) Organize results in JSON and display
########################################
conversation_json = organize_into_json(
    original_prompt,
    initial_responses,
    follow_up_questions,
    follow_up_responses,
    final_answer
)
conversation_json["cross_agent_responses"] = {
    target_rag: {
        verifier: resp.get("answer", "No answer")
        for verifier, resp in cross_agent_responses[target_rag].items()
    }
    for target_rag in cross_agent_responses
}

#display_from_json(conversation_json)
json.dump(conversation_json, open(JSON_PATH, "w"), indent=2)



Querying all RAGs for prompt: 'Explain how the kidneys regulate fluid and electrolyte balance. Include the processes of filtration, reabsorption, secretion, and excretion'
--- Querying RAG: book ---
--- Querying RAG: lectures ---

Querying all RAGs for prompt: 'The original question was:
Explain how the kidneys regulate fluid and electrolyte balance. Include the processes of filtration, reabsorption, secretion, and excretion

Here's the follow-up question:
How do variations in ADH levels and individual differences in renal function affect the kidneys' ability to regulate fluid and electrolyte balance, and what are the consequences of dysregulation in these mechanisms?'
--- Querying RAG: book ---

Querying all RAGs for prompt: 'The original question was:
Explain how the kidneys regulate fluid and electrolyte balance. Include the processes of filtration, reabsorption, secretion, and excretion

Here's the follow-up question:
How do the kidneys utilize active transport mechanisms and ion 