In [1]:
from langchain_openai import ChatOpenAI
from langchain_classic.document_loaders import UnstructuredFileLoader
from langchain_classic.text_splitter import CharacterTextSplitter
from langchain_classic.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain_classic.vectorstores import FAISS
from langchain_classic.storage import LocalFileStore
from langchain_classic.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.schema.runnable import RunnablePassthrough
from langchain_classic.schema import HumanMessage

## Set up LLM and Memory

llm = ChatOpenAI(
    temperature=0.1,
)

memory = ConversationBufferMemory(
    return_messages=True,
)

  memory = ConversationBufferMemory(


## Set up Document Loader, Splitter, Embeddings, and Vector Store

In [2]:
cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

  loader = UnstructuredFileLoader("./files/document.txt")
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
  embeddings = OpenAIEmbeddings()
  _warn_about_sha1_encoder()


## Set up Prompt Template

In [3]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("ai", "You are a helpful assistant that answers questions based on the context provided. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
        MessagesPlaceholder(variable_name="history"),
    ]
)

## Set up Chain

In [4]:
def load_and_add_memory(question):
    past_history = memory.load_memory_variables({})["history"]
    current_message = HumanMessage(content=question)
    return past_history + [current_message]

def format_docs(docs):
    # If docs is already a string, return it; otherwise join Document.page_content if available.
    if isinstance(docs, str):
        return docs
    try:
        return "\n\n".join(getattr(doc, "page_content", str(doc)) for doc in docs)
    except TypeError:
        # Fallback to string conversion
        return str(docs)

chain = (
    {
        "context": retriever | format_docs,
        "history": load_and_add_memory,
    }
    | prompt
    | llm
)

def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"output": result.content})
    print(result)

## Questions

In [5]:
invoke_chain("Is Aaronson guilty?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")

content='In the passage you provided, it is stated that Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with. However, the protagonist, Winston, later reflects on the idea that the past is alterable and that he had never seen the photograph that disproved their guilt. This ambiguity suggests that the truth of their guilt may be manipulated by the Party, and Winston himself questions the validity of the information he has been given. Ultimately, the true guilt or innocence of Aaronson and the others remains uncertain in the context of the novel.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 2055, 'total_tokens': 2167, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_

## Context Memory Verification Test 

In [6]:

invoke_chain("What is his job title?")

content="In the passage you provided, the protagonist, Winston Smith, is described as being a member of the Ministry of Truth. His specific job title within the Ministry is not explicitly mentioned in the excerpt, but it is indicated that he is involved in some form of work related to the compilation of the Eleventh Edition of the Newspeak Dictionary. Winston's role involves participating in a sub-committee tasked with addressing minor difficulties that arise during the compilation process." additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 89, 'prompt_tokens': 2419, 'total_tokens': 2508, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Cf12etoeJjT8AGgvg2bV4b2PnWptJ', 'service_tier': 