In [1]:
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma

In [2]:
api = 'AIzaSyB1p0yVl-BSf7-AGGTdMBBHb5tjsKlEv4s'

In [3]:
current_dir = os.path.dirname(os.path.abspath("__file__"))
file_path = os.path.join(current_dir, "books", "odyssey.txt")
persistent_directory = os.path.join(current_dir, "db", "chroma_db")

# Check if the Chroma vector store already exists
if not os.path.exists(persistent_directory):
    print("Persistent directory does not exist. Initializing vector store...")

    # Ensure the text file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist. Please check the path.")

    # Read the text content from the file
    # try:
    loader = TextLoader(file_path, encoding='utf-8')  # Try with UTF-8 encoding
    documents = loader.load()
   
    # Split the document into chunks
    text_splitter = CharacterTextSplitter(chunk_size=700, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)

    # Display information about the split documents
    print("\n--- Document Chunks Information ---")
    print(f"Number of document chunks: {len(docs)}")
    embeddings=GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=api)
    print("\n--- Finished creating embeddings ---")
    # setting database
    db=Chroma.from_documents(docs,embeddings,persist_directory=persistent_directory
    )
    print("\n--- Finished creating vector store ---")

Created a chunk of size 1141, which is longer than the specified 700
Created a chunk of size 2086, which is longer than the specified 700
Created a chunk of size 1121, which is longer than the specified 700
Created a chunk of size 914, which is longer than the specified 700
Created a chunk of size 1366, which is longer than the specified 700
Created a chunk of size 827, which is longer than the specified 700
Created a chunk of size 816, which is longer than the specified 700
Created a chunk of size 979, which is longer than the specified 700
Created a chunk of size 862, which is longer than the specified 700
Created a chunk of size 1011, which is longer than the specified 700
Created a chunk of size 1639, which is longer than the specified 700
Created a chunk of size 1219, which is longer than the specified 700
Created a chunk of size 1875, which is longer than the specified 700
Created a chunk of size 713, which is longer than the specified 700
Created a chunk of size 1307, which is l

Persistent directory does not exist. Initializing vector store...


Created a chunk of size 1315, which is longer than the specified 700
Created a chunk of size 1627, which is longer than the specified 700
Created a chunk of size 958, which is longer than the specified 700
Created a chunk of size 1063, which is longer than the specified 700
Created a chunk of size 941, which is longer than the specified 700
Created a chunk of size 701, which is longer than the specified 700
Created a chunk of size 775, which is longer than the specified 700
Created a chunk of size 978, which is longer than the specified 700
Created a chunk of size 799, which is longer than the specified 700
Created a chunk of size 740, which is longer than the specified 700
Created a chunk of size 1051, which is longer than the specified 700
Created a chunk of size 896, which is longer than the specified 700
Created a chunk of size 1265, which is longer than the specified 700
Created a chunk of size 1202, which is longer than the specified 700
Created a chunk of size 1509, which is lon


--- Document Chunks Information ---
Number of document chunks: 1024

--- Finished creating embeddings ---

--- Finished creating vector store ---


In [7]:
print(f"Sample chunk:\n{docs[12].page_content}\n")

Sample chunk:
Other difficulties will also disappear as soon as the development of
the poem in the writer’s mind is understood. I have dealt with this at
some length in pp. 251-261 of “The Authoress of the Odyssey”. Briefly,
the “Odyssey” consists of two distinct poems: (1) The Return of
Ulysses, which alone the Muse is asked to sing in the opening lines of
the poem. This poem includes the Phaeacian episode, and the account of
Ulysses’ adventures as told by himself in Books ix.-xii. It consists of
lines 1-79 (roughly) of Book i., of line 28 of Book v., and thence
without intermission to the middle of line 187 of Book xiii., at which
point the original scheme was abandoned.



In [8]:
# loading existing embeddings
db=Chroma(persist_directory=persistent_directory,embedding_function=embeddings)

In [19]:
# retreival part
retieval=db.as_retriever(search_type="similarity_score_threshold",
         search_kwargs={"k": 3, "score_threshold": 0.5},)

In [20]:
query = "Who is Odysseus' wife?"

In [26]:
relevant_docs=retieval.invoke(query)

In [27]:
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    if doc.metadata:
        print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")

Document 1:
Now all the rest, as many as fled from sheer destruction, were at
    home, and had escaped both war and sea, but Odysseus only, craving
    for his wife and for his homeward path, the lady nymph Calypso
    held, that fair goddess, in her hollow caves, longing to have him
    for her lord. But when now the year had come in the courses of the
    seasons, wherein the gods had ordained that he should return home
    to Ithaca, not even there was he quit of labours, not even among
    his own; but all the gods had pity on him save Poseidon, who raged
    continually against godlike Odysseus, till he came to his own
    country. Howbeit Poseidon had now departed for the distant
    Ethiopians, the Ethiopians that are sundered in twain, the
    uttermost of men, abiding some where Hyperion sinks and some where
    he rises. There he looked to receive his hecatomb of bulls and
    rams, there he made merry sitting at the feast, but the other gods
    were gathered in the halls o