In [None]:
!pip install langchain_postgres psycopg[binary]

In [1]:
from dotenv import load_dotenv

load_dotenv()

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [40]:
from langchain_postgres import PGVector
from langchain_openai import OpenAIEmbeddings
import os 
embeddings = OpenAIEmbeddings()
vectorstore = PGVector(
    embeddings=embeddings,
    collection_name="chapter6_collection",
    connection=f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@{os.getenv('PGVECTOR_HOST')}:{os.getenv('PGVECTOR_PORT')}/{os.getenv('POSTGRES_DB')}",
    use_jsonb=True
)

In [41]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser

# Step1 : 문서 로드
loader = PyMuPDFLoader('data/bccard.pdf')
docs = loader.load()

# Step2 : 문서 분할
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_documents = text_splitter.split_documents(docs)

In [None]:
# loader = PyMuPDFLoader('data/baro_rewardsplus_card.pdf')
# docs = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# split_documents = text_splitter.split_documents(docs)
# vectorstore.add_documents(split_documents)


In [None]:
# Step3 : 벡터스토어에 문서 추가
vectorstore.add_documents(split_documents)
print(f"{len(split_documents)}개의 문서가 벡터스토어에 추가되었습니다.")


In [43]:
def format_documents(documents):
    return "\n".join([doc.page_content for doc in documents])

In [44]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

# Step4 : 모델 초기화
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Step5 : 프롬프트 템플릿 생성
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "문서에서 다음 질문에 대한 답변을 찾아주세요."),
        ("system", "문서내용 : {context}"),
        ("user", "{question}")
    ]
)

# Step6 : 출력 파서 생성
output_parser = StrOutputParser()

# Step7 : 체인 생성
chain = (
   {
       "context": retriever | format_documents,
       "question": RunnablePassthrough()
   } | prompt | llm | output_parser
)

In [53]:
response = chain.invoke("2024년도 비씨카드 이사회 구성원", search_kwargs={"k": 10})

In [None]:
print(response)


In [None]:
response = retriever.invoke("2024년 비씨카드 이사회 구성원")
for doc in response:
    print(doc.page_content)
    print("-"*100)


In [None]:
resopnse = retriever.invoke("연회비 반환조건")
for doc in resopnse:
    print(doc.page_content)
    print("-"*100)

response = chain.invoke("연회비 반환조건")
print(response)

