# RAG w/ Langgraph
`12_langgraph_rag.ipynb`

- https://python.langchain.com/docs/tutorials/rag/

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
from pprint import pprint

# 1. Loader (웹문서)
from langchain_community.document_loaders import WebBaseLoader
from bs4.filter import SoupStrainer  # pip install beautifulsoup4

loader = WebBaseLoader(
    # 문서 출처 URL
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/', ),
    # 웹페이지 안에서 필요한 정보만 선택
    bs_kwargs={
        'parse_only': SoupStrainer(class_=['post-content']) 
    }
    # header_template={}
)
docs = loader.load()

# 2. Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)
print(len(splitted_docs))

# 3. Embedding Model
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model='text-embedding-3-small')  # small <-> large

# 4. Vectorstore (지금은 FAISS -> 클라우드-Pinecone)
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(splitted_docs, embedding=embedding)

In [None]:
from langchain import hub

prompt = hub.pull('rlm/rag-prompt')

for m in prompt.messages:
    m.pretty_print()

In [None]:
# LLM
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model='gpt-4.1', temperature=0)

# State
from langchain_core.documents import Document
from typing_extensions import TypedDict, List

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Node
# 검색 노드
def retrieve(state: State):
    # [ Document * 4 ]
    retrieved_docs = vectorstore.similarity_search(state['question'], k=4)

    # 나머지 return 하지 않은 state 항목들은, 알아서 그대로 감 (question, answer 는 알아서 그대로 나감)
    return { 'context': retrieved_docs, }


# 답변 생성노드
def generate(state: State):
    # Document 객체의 필요없는 정보는 다 빼고, 내용에 해당하는 page_content 만 모아서 넘기면 토큰 절약 가능.
    context_str = ''
    for doc in state['context']:
        context_str += doc.page_content + '\n------------------------\n'
    
    question_with_context = prompt.invoke({'question': state['question'], 'context': context_str})
    response = llm.invoke(question_with_context)
    return {'answer': response.content}

# Graph
from langgraph.graph import StateGraph, START, END
builder = StateGraph(State)

builder.add_node('retrieve', retrieve)
builder.add_node('generate', generate)

builder.add_edge(START, 'retrieve')
builder.add_edge('retrieve', 'generate')
builder.add_edge('generate', END)

graph = builder.compile()

# 출력
# from IPython.display import Image, display

# display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
final_state = graph.invoke({'question': '에이전트 시스템에 대해 알려줘!'})

final_state['answer']

*메세지 스트리밍*

In [None]:
for message, metadata in graph.stream(
    {"question": "What is Task Decomposition? 한국어로 답해줘"}, stream_mode="messages"
):
    print(message.content, end="")

## RAG +a
- Metadata 편집
- Query 분석 - 보완

In [None]:
# 메타데이터 편집

# 문서 63개중 1/3 지점
third = len(splitted_docs) // 3

# metadata 에 'section' 추가중 (기능적 의미는 없음.)
for idx, doc in enumerate(splitted_docs):
    if idx < third:
        doc.metadata['section'] = 'beginning'
    elif idx < third * 2:
        doc.metadata['section'] = 'middle'
    else:
        doc.metadata['section'] = 'end'

vectorstore = FAISS.from_documents(splitted_docs, embedding=embedding)

In [None]:
# State를 더 빡빡하게 정의하기 위해, 위에 따로 정의한 클래스 Search

from typing import Literal  # 말그대로
from typing_extensions import Annotated  # 할말이 좀 더 있다


class Search(TypedDict):  # StructuredOutput 에서 사용하기 위함.
    """Vectorstore Search Query"""
    # 1. 타입, 2. ... -> NOT NULL, 3. 설명(AI용)
    query: Annotated[str, ..., 'Search query to run']
    section: Annotated[
        Literal['beginning', 'middle', 'end'],
        ..., 
        'Section to query'
    ]


class MyState(TypedDict):
    question: str
    query: Search  
    context: List[Document]
    answer: str

In [None]:
# Node
def analyze_query(state: MyState):
    # Search 클래스에 맞춰 사용자 question 을 {query, section}로 바꿈
    s_llm = llm.with_structured_output(Search)
    query = s_llm.invoke(state['question'])
    return {'query': query}

def retrieve(state: MyState):
    query = state['query']
    docs = vectorstore.similarity_search(
        query['query'],
        # LLM이 판단한 section 과 실제 문서조각의 section이 맞을 경우에만 검색.
        filter=lambda metadata: metadata.get('section') == query['section'],
    )
    return {'context': docs}

def generate(state: MyState):
    # Token 아끼기 위해, 내용만 추려서 문자열로 만들기
    doc_str = ''
    for doc in state['context']:
        doc_str += doc.page_content + '\n=====================\n'
    
    question_with_context = prompt.invoke({'question': state['question'], 'context': doc_str})
    res = llm.invoke(question_with_context)
    return {'answer': res.content}


builder = StateGraph(MyState)
builder.add_node('analyze_query', analyze_query)
builder.add_node('retrieve', retrieve)
builder.add_node('generate', generate)

builder.add_edge(START, 'analyze_query')
builder.add_edge('analyze_query', 'retrieve')
builder.add_edge('retrieve', 'generate')
builder.add_edge('generate', END)

graph = builder.compile()


In [None]:
graph.invoke({'question': '작업분배 뭐냐'})

## 25-08-09 대화형 RAG


In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Langchain + Pinecone
%pip install -q langchain-pinecone

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from bs4.filter import SoupStrainer  # pip install beautifulsoup4
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

loader = WebBaseLoader(
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/', ),
    bs_kwargs={
        'parse_only': SoupStrainer(class_=['post-content']) 
    }
)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)

embedding = OpenAIEmbeddings(model='text-embedding-3-small')  # small <-> large

index_name = 'gaida-1st'

# 1회 실행하면, 실제 데이터가 들어가서 영구 저장 됨.
# vectorstore = PineconeVectorStore.from_documents(
#     splitted_docs, 
#     index_name=index_name, 
#     embedding=embedding
# )

In [None]:
# 기존에 존재하는 index를 불러오는 코드
index_name = 'gaida-1st'
vectorstore = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embedding)

In [None]:
from langchain_core.tools import tool

@tool(response_format='content_and_artifact')  # 2개를 return 한다
def retrieve(query: str):
    """Retrieve information related to a query
    Args:
        query : Query to search
    """
    # 원본 Document list (artifact)
    docs = vectorstore.similarity_search(query, k=3)
    # 편집한 텍스트 (content)
    result_text = '\n\n'.join(
        (f'Source: {doc.metadata}\nContent: {doc.page_content}')
        for doc in docs
    )
    return result_text, docs

In [66]:
from langchain_openai import ChatOpenAI
from langgraph.graph import MessagesState
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph import START, END

llm = ChatOpenAI(model='gpt-4.1', temperature=0)

# Node
def query_or_respond(state: MessagesState):
    """도구 호출을 하거나, 최종 응답을 한다."""
    llm_with_tools = llm.bind_tools([retrieve])
    guide = SystemMessage(
        content="""
        넌 AI 어시스턴트야. 만약 사용자가 LLM이나 Agent System과 관련된 질문을 하면
        `retrieve` Tool을 사용해야해.
        """
    )
    res = llm_with_tools.invoke([guide] + state['messages'])
    return {'messages': [res]}

tools = ToolNode([retrieve])

def generate(state: MessagesState):
    """응답 생성"""
    tool_messages = []
    for msg in reversed(state['messages']):  # 메세지 목록을 뒤집음: 최신 메세지부터 순회
        if msg.type == 'tool':
            tool_messages.append(msg)
        else:
            break
    tool_messages.reverse()
    docs_content = '\n\n'.join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    # 필요 없는 Tool 메세지들을 제외하고, AI, Human, System 메시지만 모아서 정리
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [67]:
from langgraph.graph import MessagesState, StateGraph

builder = StateGraph(MessagesState) # 'messages'

# builder.add_node('query_or_respond', query_or_respond)  # 아래와 같은 결과
builder.add_node(query_or_respond)
builder.add_node(tools)
builder.add_node(generate)

# builder.set_entry_point('query_or_respond')  # 아래와 같은 말
builder.add_edge(START, 'query_or_respond')
builder.add_conditional_edges(
    'query_or_respond',
    tools_condition,
    {END: END, 'tools': 'tools'}  # 정확하게 상황별 다음 Node 를 지정할 수 있음
)
builder.add_edge('tools', 'generate')
builder.add_edge('generate', END)

graph = builder.compile()

In [None]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
# 위 그래프를 실행하려면?
input_state = {
    'messages': [
        {'role': 'user', 'content': 'Task Decomposition 이 뭐야?'}
    ]
}
res = graph.invoke(input_state)

for msg in res['messages']:
    msg.pretty_print()

## 대화 기록 저장하기 (Langgraph Memory)

In [70]:
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()

# 위에 정의된 builder 사용
graph = builder.compile(checkpointer=memory)

In [None]:
config = {
    'configurable': {'thread_id': '123'}
}

input_state = {
    'messages': [
        {'role': 'user', 'content': 'LLM Agent는 어떤 경우에 만드는게 좋아?'}
    ]
}

res = graph.invoke(input_state, config)

for msg in res['messages']:
    msg.pretty_print()


LLM Agent System에 대해 알려줘
Tool Calls:
  retrieve (call_W3Hxp28bCQv6epW6Szn7XMHB)
 Call ID: call_W3Hxp28bCQv6epW6Szn7XMHB
  Args:
    query: LLM Agent System
Name: retrieve

Source: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
Content: Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:

Planning

Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.
Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, 

: 