In [None]:
from langchain_openai import ChatOpenAI
from langchain_classic.document_loaders import UnstructuredFileLoader
from langchain_classic.text_splitter import CharacterTextSplitter
from langchain_classic.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain_classic.vectorstores import FAISS
from langchain_classic.storage import LocalFileStore
from langchain_classic.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_classic.memory import ConversationSummaryBufferMemory
from langchain_classic.schema.runnable import RunnablePassthrough
from langchain_classic.schema import HumanMessage

## Set up LLM and Memory

llm = ChatOpenAI(
    temperature=0.1,
)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    return_messages=True,
)

## Set up Document Loader, Splitter, Embeddings, and Vector Store

In [None]:
cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

## Set up Prompt Template

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("ai", "You are a helpful assistant that answers questions based on the context provided. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
        MessagesPlaceholder(variable_name="history"),
    ]
)

## Set up Chain

In [None]:
def load_and_add_memory(input_dict):
    past_history = memory.load_memory_variables({})["history"]
    current_message = HumanMessage(content=input_dict["question"])
    return past_history + [current_message]

def format_docs(docs):
    # 각 Document 객체를 문자열로 변환한 다음 합칩니다.
    # 이렇게 하면 doc.page_content가 없더라도 객체 자체를 문자열화 시도합니다.
    return "\n\n".join(str(doc) for doc in docs)

chain = (
    {
        "context": retriever | format_docs,
        "history": load_and_add_memory,
    }
    | prompt
    | llm
)

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context({"input": question}, {"output": result.content})
    print(result)

In [None]:
invoke_chain("Is Aaronson guilty?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")