In [95]:
from operator import itemgetter
from pathlib import Path
from typing import List

from langchain.chains.conversation.base import ConversationChain
from langchain.embeddings import CacheBackedEmbeddings
from langchain.memory import ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from langchain.schema import runnable
from langchain.storage import LocalFileStore
from langchain_community.chat_message_histories.in_memory import ChatMessageHistory
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv


load_dotenv()
#load document
loader = UnstructuredFileLoader("./files/source_text.txt")


def format_docs(docs: List[Document]) -> str:
    return "\n\n".join([doc.page_content for doc in docs])

In [96]:
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap=50)
docs = loader.load_and_split(text_splitter=splitter)


In [97]:
cache_dir = LocalFileStore(f'{Path.cwd()}/.cache')
embedding = OpenAIEmbeddings(model="text-embedding-3-large")
cache_embedding = CacheBackedEmbeddings.from_bytes_store(underlying_embeddings=embedding,
                                                         document_embedding_cache=cache_dir)
vector_search = FAISS.from_documents(documents=docs, embedding=cache_embedding)

In [98]:
model = ChatOpenAI()
retriever = vector_search.as_retriever()
memory = ConversationSummaryBufferMemory(
    llm=model,
    max_token_limit=1024,
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """Based on the following conversation history and context, please answer the question. If you don't know the answer, don't make it up; simply say that you don't know.

            Context: {context}
            """
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

def load_memory(_):
    return memory.load_memory_variables({})["history"]


chain = (
        {
            "context": itemgetter("question")| retriever | format_docs,
            "question": RunnablePassthrough(),
            "history": RunnableLambda(load_memory),
        }
        | prompt
        | model
)

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)




In [99]:
invoke_chain("Is Aaronson guilty?")

content='Yes, according to the conversation history provided, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.' response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 1664, 'total_tokens': 1691}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}


In [100]:
invoke_chain("What message did he write in the table?")

content='He wrote the following message on the table:\n\nFREEDOM IS SLAVERY\n\nTWO AND TWO MAKE FIVE\n\nGOD IS POWER' response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 1793, 'total_tokens': 1823}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}


In [101]:
invoke_chain("Who is Julia?")

content='Julia is a character in the novel, "1984" by George Orwell. She is Winston\'s lover and a fellow member of the Party who rebels against the oppressive regime.' response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 1854, 'total_tokens': 1890}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}
