# challenge4
- ConversationBufferMemory: 이전 대화내용 그대로 저장
- Stuff Documents chain: docs > 하나의doc > answer

In [8]:
'''
[stuff]
1. 실행구조
    human: Question
    ai: invoke(question, retriever, history(memory)) > result > save(memory)

2. retriever산출(stuff)
    1) load_file(o)
    2) text_split(o)
    3) embeddings & cache(o)
    4) vectorstore > retriever(o)
'''

'\n[stuff]\n1. 실행구조\n    human: Question\n    ai: invoke(question, retriever, history(memory)) > result > save(memory)\n\n2. retriever산출(stuff)\n    1) load_file\n    2) text_split\n    3) embeddings & cache\n    4) vectorstore > retriever\n'

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.vectorstores import Chroma
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory



#---------- function
# retiever format
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)


#load memory
def load_memory(_):
    print(memory.load_memory_variables({})["history"])
    return memory.load_memory_variables({})["history"]


def embed_file(file):
    #load_file    
    loader = TextLoader(file)

    #split file
    splitter = CharacterTextSplitter.from_tiktoken_encoder(
        separator="\n",
        chunk_size=600,
        chunk_overlap=100,
    )
    docs = loader.load_and_split(text_splitter=splitter)
    print(len(docs))
    print(docs[:5])

    #embeddings & cache
    embeddings = OpenAIEmbeddings()
    file_name = file.split('/')[-1]
    cache_path = LocalFileStore(f"./.cache/embeddings/{file_name}") 
    cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_path)

    #vectorstore > retriever
    vectorstore = Chroma.from_documents(docs, cached_embeddings)
    retriever = vectorstore.as_retriever()
    return retriever


def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context(
        {'input': question},
        {'output': result.content}
    )
    print(result)



#--------- retriever
file_path = r"../files/chapter3.txt"
retriever = embed_file(file_path)


#---------- prompt & invoke
llm = ChatOpenAI(temperature=0.1) #llm
memory = ConversationBufferMemory(return_messages=True) #memory
#prompt
prompt = ChatPromptTemplate.from_messages([
    (
        'system',
        """
            너는 도움을 주는 훌륭한 조수야. 주어지는 문서에서 묻는 말에 대한 답을 찾아줘. 만약 문서안에 답이 없다면 모르겠다고 대답해.\n\n{context}
        """,
    ),
    MessagesPlaceholder(variable_name='history'),
    ('human', '{question}')
])
chain = (
    {
        "context": retriever | RunnableLambda(format_docs),
        "question": RunnablePassthrough(),
        "history": load_memory
    }
    | prompt
    | llm
)

34
[Document(metadata={'source': '../files/chapter3.txt'}, page_content='Chapter 3\n\'There are three stages in your reintegration,\' said O\'Brien. \'There is\nlearning, there is understanding, and there is acceptance. It is time for\nyou to enter upon the second stage.\'\nAs always, Winston was lying flat on his back. But of late his bonds were\nlooser. They still held him to the bed, but he could move his knees a\nlittle and could turn his head from side to side and raise his arms from\nthe elbow. The dial, also, had grown to be less of a terror. He could\nevade its pangs if he was quick-witted enough: it was chiefly when he\nshowed stupidity that O\'Brien pulled the lever. Sometimes they got through\na whole session without use of the dial. He could not remember how many\nsessions there had been. The whole process seemed to stretch out over a\nlong, indefinite time--weeks, possibly--and the intervals between the\nsessions might sometimes have been days, sometimes only an hour or tw

  warn_deprecated(


In [3]:
invoke_chain('Is Aaronson guilty?')


[]
content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.' response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 2435, 'total_tokens': 2460}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-4d2932b8-606c-474c-9452-551412ea3619-0'


In [4]:
invoke_chain('What message did he write in the table?')


[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.')]
content='The message he wrote on the table was: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE".' response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 2448, 'total_tokens': 2477}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-1889cfbe-f382-4a91-9569-44f8af70f345-0'


In [5]:
invoke_chain('Who is Julia?')

[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The message he wrote on the table was: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE".')]
content='Julia is a character in the text who is involved in a romantic relationship with Winston, the protagonist.' response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 2479, 'total_tokens': 2500}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-326a6a06-c61f-4806-a1bf-8681dd1112ad-0'


In [6]:
invoke_chain('Aaronson 은 유죄인가요?')

[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The message he wrote on the table was: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE".'), HumanMessage(content='Who is Julia?'), AIMessage(content='Julia is a character in the text who is involved in a romantic relationship with Winston, the protagonist.')]
content='죄를 지은 것으로 기록되어 있습니다.' response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 2537, 'total_tokens': 2552}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-21ce8060-4770-40d3-9e63-1343457c8c84-0'


In [7]:
invoke_chain('그가 테이블에 어떤 메시지를 썼나요?')

[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The message he wrote on the table was: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE".'), HumanMessage(content='Who is Julia?'), AIMessage(content='Julia is a character in the text who is involved in a romantic relationship with Winston, the protagonist.'), HumanMessage(content='Aaronson 은 유죄인가요?'), AIMessage(content='죄를 지은 것으로 기록되어 있습니다.')]
content='그가 테이블에 쓴 메시지는 "자유는 노예다"와 "2와 2는 5다" 입니다.' response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 2584, 'total_tokens': 2625}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-5af81eb2-7c52-49c6-9072-f110fc65ca8b-0'


In [8]:
invoke_chain('Julia 는 누구인가요?')

[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The message he wrote on the table was: "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE".'), HumanMessage(content='Who is Julia?'), AIMessage(content='Julia is a character in the text who is involved in a romantic relationship with Winston, the protagonist.'), HumanMessage(content='Aaronson 은 유죄인가요?'), AIMessage(content='죄를 지은 것으로 기록되어 있습니다.'), HumanMessage(content='그가 테이블에 어떤 메시지를 썼나요?'), AIMessage(content='그가 테이블에 쓴 메시지는 "자유는 노예다"와 "2와 2는 5다" 입니다.')]
content='주인공인 윈스턴과 로맨틱한 관계에 있는 캐릭터입니다.' response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 2628, 'total_tokens': 2657}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-9f02caa3-062b-4b52-9a77-c1