In [None]:
from langchain import hub
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import os


In [None]:
# OpenAI 의 API 키 등록
os.environ['OPENAI_API_KEY'] = "your_key"

# 헌법 PDF 파일 로드
loader = PyPDFLoader(r"헌법.pdf")
pages = loader.load_and_split()

# PDF 파일을 1000자 청크로 분할
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100
)
docs = text_splitter.split_documents(pages)

# ChromaDB에 청크들을 벡터 임베딩으로 저장(OpenAI 임베딩 모델 활용)
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(
    model="text-embedding-3-small"
), persist_directory="RAG_EX/Chroma_DB")
retriever = vectorstore.as_retriever()

# GPT-4o-mini 모델 선언
llm = ChatOpenAI(model="gpt-4o-mini")

# Langchain Hub에서 RAG 프롬프트 호출
prompt = hub.pull('rlm/rag-prompt')

# chain 구축
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

###################################### 테스트
answer = rag_chain.invoke("국회의원의 의무는 뭐야?")
print(answer)

print(rag_chain.get_graph().print_ascii())