In [19]:
import os
from typing import List
from pydantic import BaseModel
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader,WebBaseLoader
from langgraph.graph import StateGraph, END

In [20]:
import os
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
llm=init_chat_model("openai:gpt-4o")

In [21]:
# Load and Embed Documents\n",
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/"
    ]
docs = []
for url in urls:
    docs.extend(WebBaseLoader(url).load())
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)
embedding = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embedding)
retriever = vectorstore.as_retriever()

In [22]:
# State Schema\n",
class RAGState(BaseModel):
    question: str 
    sub_questions: List[str] = []
    retrieved_docs: List[Document] = []
    answer: str = ""

In [23]:
# Nodes\n",
## a. Query Planner: splits input question
def plan_query(state: RAGState) -> RAGState:
    prompt = f"""
    Break the following complex question into 2-3 sub-questions:
    Question: {state.question}
    Sub-questions:
    """
    result = llm.invoke(prompt)
    sub_questions = [line.strip("- ") for line in result.content.splitlines() if line.strip()]
    return RAGState(question=state.question, sub_questions=sub_questions)
## b. Retrieve documents for each sub-question\n",
def retrieve_for_each(state: RAGState) -> RAGState:
    all_docs = []
    for sub in state.sub_questions:
        docs = retriever.invoke(sub)
        all_docs.extend(docs)
    return RAGState(question=state.question, sub_questions=state.sub_questions, retrieved_docs=all_docs)
## c. Generate final answer
def generate_final_answer(state: RAGState) -> RAGState:
    context = "".join([doc.page_content for doc in state.retrieved_docs])
    prompt = f"""
    Use the context below to answer the question.\n",
    Context:
    {context}
    Question: {state.question}
    """
    answer = llm.invoke(prompt).content
    return RAGState(question=state.question, sub_questions=state.sub_questions, retrieved_docs=state.retrieved_docs, answer=answer)

In [24]:
#Build LangGraph
builder = StateGraph(RAGState)
builder.add_node("planner", plan_query)
builder.add_node("retriever", retrieve_for_each)
builder.add_node("responder", generate_final_answer)
builder.set_entry_point("planner")
builder.add_edge("planner", "retriever")
builder.add_edge("retriever", "responder")
builder.add_edge("responder", END)
graph = builder.compile()

In [25]:
# Run the pipeline
if __name__ == "__main__":
    user_query = "Explain how agent loops work and what are the challenges in diffusion video generation?"
    initial_state = RAGState(question=user_query)
    final_state = graph.invoke(initial_state)
    print(final_state)
    print("🔍 Sub-questions:")
    for q in final_state['sub_questions']:
          print("-", q)
    print("✅ Final Answer:", final_state['answer'])

{'question': 'Explain how agent loops work and what are the challenges in diffusion video generation?', 'sub_questions': ['1. What is an agent loop, and how does it function in the context of computational systems or artificial intelligence?', '2. What are the primary challenges faced in the process of diffusion video generation, and how do these challenges affect the overall quality and efficiency of the generated videos?'], 'retrieved_docs': [Document(id='0505d5e5-3a79-48ac-bdba-8aedefdfa87f', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAge