In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
# from langchain.document_loaders import UnstructuredFileLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter

from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\n",
    chunk_size=600,
    chunk_overlap=100
)
loader = TextLoader("./files/animal_farm.txt")

# docs = loader.load()
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vector_store = Chroma.from_documents(docs, cached_embeddings)


In [4]:
results = vector_store.similarity_search("Who is the leader of the farm?") 

results

[Document(page_content='"But is this simply part of the order of nature? Is it because this land\nof ours is so poor that it cannot afford a decent life to those who dwell\nupon it? No, comrades, a thousand times no! The soil of England is\nfertile, its climate is good, it is capable of affording food in abundance\nto an enormously greater number of animals than now inhabit it. This\nsingle farm of ours would support a dozen horses, twenty cows, hundreds of\nsheep--and all of them living in a comfort and a dignity that are now\nalmost beyond our imagining. Why then do we continue in this miserable\ncondition? Because nearly the whole of the produce of our labour is stolen\nfrom us by human beings. There, comrades, is the answer to all our\nproblems. It is summed up in a single word--Man. Man is the only real\nenemy we have. Remove Man from the scene, and the root cause of hunger and\noverwork is abolished for ever.\n\n"Man is the only creature that consumes without producing. He does n