In [None]:
!pip install --upgrade langchain-text-splitters langchain-community langgraph
!pip install -qU "langchain[google-genai]"
!pip install -U langchain-openai

In [None]:
!pip install -U langchain_core

In [65]:
import bs4
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing import List, TypedDict, Literal, Annotated

In [None]:
load_dotenv()

In [37]:
llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

In [11]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
from openai import OpenAI
client = OpenAI()

for m in client.models.list().data:
    if 'embedding' in m.id:
        print(m.id)
        

In [26]:
vector_store = InMemoryVectorStore(embeddings)

In [93]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(docs)
total_chunks = len(chunks)
third = total_chunks // 3

for i, doc in enumerate(chunks):
    if i < third:
        doc.metadata["section"] = "beginning"
    elif i < 2*third:
        doc.metadata["section"] = "middle"
    else:
        doc.metadata['section'] = "end"

sections = set([d.metadata['section'] for d in chunks])

print(f"From {len(docs)} docs created {len(chunks)} chunks.")
print(sections)

From 1 docs created 63 chunks.
{'end', 'beginning', 'middle'}


In [94]:
vector_store = InMemoryVectorStore.from_documents(chunks, embeddings)
print(f"Created vector store with {len(vector_store.store.items())} items.")

Created vector store with 63 items.


In [34]:
prompt = hub.pull("rlm/rag-prompt")
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [42]:
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": state["context"]})
    response = llm.invoke(messages)
    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()
    

In [54]:
response = graph.invoke({"question": "What is RAG?"})
print(response["answer"])

I don't know the answer. The provided context does not contain information about RAG.


In [56]:
response = await graph.ainvoke({"question": "What is RAG?"})
print(response["answer"])

I don't know the answer based on the provided context. The retrieved documents discuss Generative Agents, ReAct, Reflexion, and AutoGPT, but do not define RAG.


In [None]:
print(len(response["context"]))
for d in response["context"]:
    print()
    print(d.page_content)

In [86]:
for step in graph.stream(
    {"question": "What this article is about"}, stream_mode="updates"
):
    print(f"{step}\n\n------------------\n")

{'analyze_query': {'query': {'section': 'beginning', 'query': 'what this article is about'}}}

------------------

{'retrieve': {'context': []}}

------------------

{'generate': {'answer': 'I cannot tell you what this article is about because no context was provided. Please provide the article or relevant text for me to summarize it.'}}

------------------



In [62]:
async for step in graph.astream(
       {"question": "What this article is about"}, stream_mode="updates"
):
    print(f"{step.keys()}\n\n------------------\n") 

dict_keys(['retrieve'])

------------------

dict_keys(['generate'])

------------------



In [64]:
print(type(...))

<class 'ellipsis'>


In [67]:
class Search(TypedDict):
    query: Annotated[str, ..., "Search query to run."]
    section: Annotated[Literal["beginning", "middle", "end"], ..., "Section to query."]
    

In [95]:
class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str

def analyze_query(state: State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {"query": query}

def retrieve(state: State):
    query = state["query"]
    retrieved_docs = vector_store.similarity_search(
        query["query"],
       filter=lambda doc: doc.metadata.get("section") == query["section"],
    )
    print(len(retrieved_docs))
    return {"context": retrieved_docs}

def generate(state: State):
    context_docs = "\n\n".join(d.page_content for d in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": context_docs})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [96]:
graph_builder = StateGraph(State).add_sequence([analyze_query, retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [97]:
for step in graph.stream(
    {"question": "What does the end of the post say about Task Decomposition?"},
    stream_mode="updates",
):
    print(f"{step}-------------------------\n")
    

{'analyze_query': {'query': {'query': 'Task Decomposition', 'section': 'end'}}}-------------------------

4
{'retrieve': {'context': [Document(id='bded1680-1997-47b2-aeab-34affa8a2af7', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'section': 'end'}, page_content='You will get instructions for code to write.\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\nMake sure that every detail of the architecture is, in the end, implemented as code.\nThink step by step and reason yourself to the right decisions to make sure we get it right.\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\nThen you will output the content of each file including ALL code.\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\nFILENAME is the lowercase file na