In [2]:
# Embedding

from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore

embedder = OpenAIEmbeddings()

embedder.embed_query("Hi")
embedder.embed_documents(["Hi", "how", "are", "you"])

cache_dir = LocalFileStore("../../.cache")
cached_embedder = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)

In [3]:
# Vector Store

from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

loader = UnstructuredFileLoader("../../files/1984.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50, separator="\n"
)
docs = loader.load_and_split(text_splitter=splitter)

vectorstore = Chroma.from_documents(docs, cached_embedder)
vectorstore.similarity_search("Where does winston live?")

[Document(page_content='It was a bright cold day in April, and the clocks were striking thirteen. Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind, slipped quickly through the glass doors of Victory Mansions, though not quickly enough to prevent a swirl of gritty dust from entering along with him.\nThe hallway smelt of boiled cabbage and old rag mats. At one end of it a coloured poster, too large for indoor display, had been tacked to the wall. It depicted simply an enormous face, more than a metre wide: the face of a man of about forty-five, with a heavy black moustache and ruggedly handsome features. Winston made for the stairs. It was no use trying the lift. Even at the best of times it was seldom working, and at present the electric current was cut off during daylight hours. It was part of the economy drive in preparation for Hate Week. The flat was seven flights up, and Winston, who was thirty-nine and had a varicose ulcer above his right ankle,