In [None]:
#### 전체 계획 다시 잡기

파싱 -> 청킹 진행
벡터스토어 + 임베딩 + 리트리버 생성
생성기(+LLM) 생성
langgraph 연결
query expension 만들기
reranker 만들기
augmenter 만들기
filter 만들기
promptmaker 만들기 (?)

In [None]:
# ingestion.py

import getpass
import os
from langchain_openai import OpenAIEmbeddings
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

pinecone_api = os.environ["PINECONE_API_KEY"]

# vectorstore load
pc = Pinecone(api_key=pinecone_api)

index_name = "ragtest"
index = pc.Index(index_name)

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vector_store = PineconeVectorStore(embedding=embeddings, index=index)

retriever = vector_store.as_retriever(
  search_type="similarity", search_kwargs={"k": 5},
)

In [3]:
from typing import Annotated, TypedDict
from langgraph.graph.message import add_messages

# GraphState 상태 정의
class GraphState(TypedDict):
    question: Annotated[str, "Question"]
    context: Annotated[str, "Context"]
    answer: Annotated[str, "Answer"]
    message: Annotated[list, add_messages]

In [34]:
# retrieve.py
def retrieve_document(state: GraphState) -> GraphState:
    print("---RETRIEVE---")
    questions = state["question"]
    documents = retriever.invoke(questions)
    print(documents)
    return {"context": documents, "question": questions}

In [5]:
# generation.py
# 답변 생성 체인
from dotenv import load_dotenv
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

load_dotenv()

llm = ChatOpenAI(temperature=0)
prompt = hub.pull("rlm/rag-prompt")

generation_chain = prompt | llm | StrOutputParser()



In [40]:
# generate.py
# 답변 실행 역할

from typing import Any, Dict

# from generation import generation_chain
# from state import GraphState

def generate(state: GraphState) -> Dict[str, Any]:
    print("---GENERATE---")
    question = state["question"]
    context = state["context"]

    generation = generation_chain.invoke({"context": context, "question": question})
    # print(f"question:{question}")
    # print(f"context:{context}")
    # print(f"generation:{generation}")
    message = [{"role": "user", "content": question},{"role":"assistant", "content":generation}]
    return {"context": context, "question": question, "answer": generation, "message": message}

In [41]:
# consts.py
RETRIEVE = "retrieve"
GRADE_DOCUMENTS = "grade_documents"
GENERATE = "generate"
WEBSEARCH = "websearch"

# graph.py
from dotenv import load_dotenv
from langgraph.graph import END, StateGraph

# from consts import RETRIEVE, GRADE_DOCUMENTS, GENERATE, WEBSEARCH

# from nodes import generate, grade_documents, retrieve, web_search
# from state import GraphState

load_dotenv()

# def decide_to_generate(state):
#     print("--ASSESS GRADED DOCUMENTS---")

#     if state["web_search"]:
#         print(
#             "---DESISION: NOT ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---"
#         )
#         return WEBSEARCH
#     else:
#         print("---DECISION: GENERATE---")
#         return GENERATE

workflow = StateGraph(GraphState)

workflow.add_node("retrieve", retrieve_document)
workflow.add_node("generate", generate)

workflow.set_entry_point("retrieve")

workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)

app = workflow.compile()

In [42]:
app.get_graph().draw_mermaid_png(output_file_path='./graph.png')

b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00n\x00\x00\x01M\x08\x02\x00\x00\x00nv\xcd\x80\x00\x00\x00\x01sRGB\x00\xae\xce\x1c\xe9\x00\x00\x1f\x9dIDATx\x9c\xed\x9dw\\T\xc7\xda\xc7\xe7l\xdf=\xdb\x97]z\x13\xac\x08\x8aB\xa2\x06TlQD\x05b\x90\x18{\xbcIn\xaco\xae1D11y/\xc4\xe8M.Qb\xc95\xe0\x8d%\x1a\x8d\xbdk\x125\x88\x02b\xc3^A:la\x97\xed\xf5\xfdcy\x89\xd1e\x0b\xce\xc2\x1e9\xdf?\xfc\xb8\xe7\xcc\xcc>\xfbc\xce\xcc\x9c\x99g\xe6A,\x16\x0b\xc0\x81\x01\xa1\xb3\rxy\xc0\xa5\x84\x06.%4p)\xa1\x81K\t\r\\Jh\x90\xa0\x94R\xffD\xabV\x98\xd4\xcdF\xa3\xc1\xa2\xd3\x98\xa1\x94\xe9n\xa8t\x02\x99B`\xb0\x89\x0c\x16Q\x14H{\xf1\x02_H\xca\xfbW\x9b\x1f\x95\xa9\x1e\xdfP\x05\xf7a\x98\x0c\x16\x06\x8b\xc4\xf7\xa6\x00\x8c\x8cS\xcdFP[\xa3Q+L\x14:\xa1\xf2\x8e:\xa4/\xda-\x12\xed\xd6\x97\xd9\xee\x02\x91\xf6\r\xd1o\x17+\n\x0fI\x02{\xd2Cz\xa3\xa1}Q2\x15\xdb\r\x85Fe*\xbf\xa1\xaa~\xa8\xaey\xa4\x1d2\xc1+\xbc_{\x04uYJ\xb9\xd8prk\x1d\xcf\x9b2d\x82\x80\xc1\x82\xd3>x\x0er\xb1\xa1\xf0\x90\xd8h\xb0\x8c\x99\xeeM\xa5\x13]\xca\xeb\x9a\x9

In [43]:
print(app.invoke(input={"question": "iso 설정 방법에 대해 알려줘"}))

---RETRIEVE---
[Document(id='6d7d3a49-66a3-416f-b9bd-013496fb7306', metadata={'author': '', 'creationDate': "D:20201113144933+09'00'", 'creator': 'AH CSS Formatter V6.6 MR8 for Windows (x64) : 6.6.10.40521 (2019-09-24T09:58+09)', 'encryption': '', 'format': 'PDF 1.6', 'keywords': '', 'modDate': "D:20201113145327+09'00'", 'page': 591.0, 'producer': 'Antenna House PDF Output Library 6.6.1502 (Windows (x64))', 'source': 'Camera/EOS_M50_Mark_II_Advanced_User_Guide_K.pdf', 'subject': '', 'title': '', 'total_pages': 704.0, 'trapped': ''}, page_content='## 설정\n\n이 장에서는 설정 ([ ]) 탭의 메뉴 설정에 대해 설명합니다.\n\n페이지 제목 우측의 은 해당 기능이 크리에이티브 존 모드 (< >/< >/< >/\n\n< >)에서만 사용 가능함을 나타냅니다.\n\n - 탭 메뉴: 설정\n\n- 폴더 선택하기\n\n- 파일 번호\n\n- 포맷하기\n\n- 자동 회전\n\n- 동영상에 방향 정보 추가하기\n\n- 날짜/시간/지역\n\n- 언어\n\n- 비디오 형식\n\n- 표시음\n\n- 절전\n\n- 에코 모드\n\n- 표시 설정\n\n- 화면 밝기\n\n- 뷰파인더 밝기\n\n- 메뉴 화면 확대\n\n- HDMI 해상도\n\n- 터치 제어\n\n- 센서 클리닝\n\n- 카메라 설정 초기화\n\n - 사용자 정의 기능 (C.Fn)\n\n- 저작권 정보\n\n- 기타 정보\n\n591\n\n\n-----\n\n'), Document(id

In [44]:
inputs = {"question": "iso 설정 방법에 대해 알려줘"}

for chunk_msg, metadata in app.stream(inputs, stream_mode="messages"):
        print(chunk_msg.content, end="", flush=True)

---RETRIEVE---
[Document(id='6d7d3a49-66a3-416f-b9bd-013496fb7306', metadata={'author': '', 'creationDate': "D:20201113144933+09'00'", 'creator': 'AH CSS Formatter V6.6 MR8 for Windows (x64) : 6.6.10.40521 (2019-09-24T09:58+09)', 'encryption': '', 'format': 'PDF 1.6', 'keywords': '', 'modDate': "D:20201113145327+09'00'", 'page': 591.0, 'producer': 'Antenna House PDF Output Library 6.6.1502 (Windows (x64))', 'source': 'Camera/EOS_M50_Mark_II_Advanced_User_Guide_K.pdf', 'subject': '', 'title': '', 'total_pages': 704.0, 'trapped': ''}, page_content='## 설정\n\n이 장에서는 설정 ([ ]) 탭의 메뉴 설정에 대해 설명합니다.\n\n페이지 제목 우측의 은 해당 기능이 크리에이티브 존 모드 (< >/< >/< >/\n\n< >)에서만 사용 가능함을 나타냅니다.\n\n - 탭 메뉴: 설정\n\n- 폴더 선택하기\n\n- 파일 번호\n\n- 포맷하기\n\n- 자동 회전\n\n- 동영상에 방향 정보 추가하기\n\n- 날짜/시간/지역\n\n- 언어\n\n- 비디오 형식\n\n- 표시음\n\n- 절전\n\n- 에코 모드\n\n- 표시 설정\n\n- 화면 밝기\n\n- 뷰파인더 밝기\n\n- 메뉴 화면 확대\n\n- HDMI 해상도\n\n- 터치 제어\n\n- 센서 클리닝\n\n- 카메라 설정 초기화\n\n - 사용자 정의 기능 (C.Fn)\n\n- 저작권 정보\n\n- 기타 정보\n\n591\n\n\n-----\n\n'), Document(id

### 추후 참고

In [None]:
# 추후 사용
# retrieval_grader.py
# 문서가 실제로 질몬과 관련이 있는지 판단하는 내용

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

llm = ChatOpenAI(temperature=0)


class GradeDocuments(BaseModel):
    """Binary score for relevance score on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


structured_llm_grader = llm.with_structured_output(GradeDocuments)

system = """You are a grade accessing relevance of a retrieved document to a user question. \n
If the document contains keywors(s) or semantic meaning related to the question, grade it as relevant. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader


In [None]:
## grade_documents.py
# 모든 문서를평가하는 노드

from typing import Any, Dict

from graphs.chains.retrieval_grader import retrieval_grader
from graphs.state import GraphState


def grade_documents(state: GraphState) -> Dict[str, Any]:
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    filtered_docs = []
    web_search = False
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")

    if len(filtered_docs) == 0:
        web_search = True

    return {"documents": filtered_docs, "question": question, "web_search": web_search}
