In [None]:
from langchain import hub
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.history import BaseChatMessageHistory, RunnableWithMessageHistory
import os
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [None]:
# OpenAI 의 API 키 등록
os.environ['OPENAI_API_KEY'] = "your_key"

# 헌법 PDF 파일 로드
loader = PyPDFLoader(r"헌법.pdf")
pages = loader.load_and_split()

# PDF 파일을 1000자 청크로 분할
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100
)
docs = text_splitter.split_documents(pages)

# ChromaDB에 청크들을 벡터 임베딩으로 저장(OpenAI 임베딩 모델 활용)
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(
    model="text-embedding-3-small"
), persist_directory="RAG_EX/Chroma_DB")
retriever = vectorstore.as_retriever()

# GPT-4o-mini 모델 선언
llm = ChatOpenAI(model="gpt-4o-mini")

In [None]:
# Define the contextualize question prompt
contextualize_q_system_prompt = """Given a chat history and the latest user
question \
which might reference context in the chat histroy, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

In [None]:
# 시스템 프롬프트 : RAG에서 컨텍스트와 사용자 질문을 함께 다룰 수 있도록 만들어줌
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise. \

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [None]:
# 채팅 세션별 기록 저장을 위한 Dictionary 선언
store = {}

# 주어진 session_id 값에 매칭되는 채팅 히스토리를 가져오는 함수 선언
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


# RunnableWithMessageHistory 모듈로 rag_chain에 채팅 기록 세션별로 자동 저장 기능 추가
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history=get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [None]:
# 테스트
result = conversational_rag_chain.invoke(
    {"input": "대통령의 임기는 몇 년이야?"},
    config={
        "configurable": {"session_id" : "1234"}
    },
)['answer']

result2 = conversational_rag_chain.invoke(
    {"input": "국회의원은?"},
    config={
        "configurable": {"session_id": "1234"}
    }
)['answer']

print(result)
print("\n\n")
print(result2)