In [1]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from langchain.vectorstores import Chroma

In [2]:
load_dotenv()

True

In [3]:
from langchain.schema import Document

In [5]:
doc1 = Document(
page_content="Large language models like GPT-4 are capable of understanding and generating human-like text. They are trained on diverse datasets and can perform tasks such as summarization, question answering, and translation.",
metadata = {"topic":"AI and Language Models"}
)

doc2 = Document(
    page_content="Climate change refers to long-term shifts in temperatures and weather patterns. These changes may be natural, but human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels.",
    metadata={"topic": "Climate Change"}
)

doc3 = Document(
    page_content="Our solar system consists of the sun and everything bound to it by gravity — the planets, moons, asteroids, comets, and more. Earth is the third planet from the sun and the only one known to support life.",
    metadata={"topic": "The Solar System"}
)

doc4 = Document(
    page_content="Regular physical activity improves brain health, helps manage weight, reduces the risk of disease, and strengthens bones and muscles. It can also improve mood and mental health.",
    metadata={"topic": "Benefits of Exercise"}
)

doc5 = Document(
    page_content="The internet originated from research commissioned by the U.S. Department of Defense in the 1960s. It evolved from ARPANET and grew into the global network we use today for communication, commerce, and information sharing.",
    metadata={"topic": "History of the Internet"}
)


In [7]:
docs = [doc1,doc2,doc3,doc4,doc5]

In [8]:
vector_store = Chroma(
    embedding_function=OpenAIEmbeddings(),
    persist_directory="my_db",
    collection_name="database"
)

  vector_store = Chroma(


In [9]:
vector_store.add_documents(docs)

['9d5d1713-f6c6-472f-8a61-22fa6f57ed70',
 'ca443c18-18d2-4afe-85ea-10269d769113',
 '9c4a3acc-0840-4e17-b5a9-27256374a79f',
 '8bde7f71-1cc8-4ded-a54a-aacb0c87a792',
 '9570f497-8e20-43ad-8745-8abe60484886']

In [10]:
vector_store.get(include=["embeddings","documents","metadatas"])

{'ids': ['9d5d1713-f6c6-472f-8a61-22fa6f57ed70',
  'ca443c18-18d2-4afe-85ea-10269d769113',
  '9c4a3acc-0840-4e17-b5a9-27256374a79f',
  '8bde7f71-1cc8-4ded-a54a-aacb0c87a792',
  '9570f497-8e20-43ad-8745-8abe60484886'],
 'embeddings': array([[-0.0124462 , -0.0086051 ,  0.00064425, ..., -0.01183526,
         -0.02155175, -0.01691123],
        [ 0.01494504, -0.02435577,  0.01298895, ...,  0.00910658,
         -0.01808195, -0.04687472],
        [ 0.02906025,  0.00743286, -0.01383407, ..., -0.0123301 ,
         -0.02930884, -0.02429974],
        [ 0.00049892, -0.01098975,  0.02316791, ..., -0.00683633,
         -0.00133246, -0.03762085],
        [ 0.01404411, -0.00625209,  0.00463209, ..., -0.01661887,
         -0.00997871,  0.00134666]], shape=(5, 1536)),
 'documents': ['Large language models like GPT-4 are capable of understanding and generating human-like text. They are trained on diverse datasets and can perform tasks such as summarization, question answering, and translation.',
  'Clima

In [11]:
vector_store.similarity_search(
    query="What are Large Language Models?",
    k=2
)

[Document(metadata={'topic': 'AI and Language Models'}, page_content='Large language models like GPT-4 are capable of understanding and generating human-like text. They are trained on diverse datasets and can perform tasks such as summarization, question answering, and translation.'),
 Document(metadata={'topic': 'Climate Change'}, page_content='Climate change refers to long-term shifts in temperatures and weather patterns. These changes may be natural, but human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels.')]

In [12]:
vector_store.similarity_search_with_score(
    query="What is solar System?",
    k=2
)

[(Document(metadata={'topic': 'The Solar System'}, page_content='Our solar system consists of the sun and everything bound to it by gravity — the planets, moons, asteroids, comets, and more. Earth is the third planet from the sun and the only one known to support life.'),
  0.25423794984817505),
 (Document(metadata={'topic': 'History of the Internet'}, page_content='The internet originated from research commissioned by the U.S. Department of Defense in the 1960s. It evolved from ARPANET and grew into the global network we use today for communication, commerce, and information sharing.'),
  0.5108059644699097)]

In [13]:
updated_doc1 = Document(
    page_content="Large language models like Llama's are capable of understanding and generating human-like text. They are developed by the Meta.",
    metadata={"topic":"Large Language Models"}
)

In [17]:
vector_store.update_document(document_id="9d5d1713-f6c6-472f-8a61-22fa6f57ed70",document=updated_doc1)

In [18]:
vector_store.similarity_search(
    query="What are Large Language Models?",
    k=2
)

[Document(metadata={'topic': 'Large Language Models'}, page_content="Large language models like Llama's are capable of understanding and generating human-like text. They are developed by the Meta."),
 Document(metadata={'topic': 'Climate Change'}, page_content='Climate change refers to long-term shifts in temperatures and weather patterns. These changes may be natural, but human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels.')]

In [19]:
vector_store.delete(ids="9d5d1713-f6c6-472f-8a61-22fa6f57ed70")

In [20]:
vector_store.similarity_search(
    query="What are Large Language Models?",
    k=2
)

[Document(metadata={'topic': 'Climate Change'}, page_content='Climate change refers to long-term shifts in temperatures and weather patterns. These changes may be natural, but human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels.'),
 Document(metadata={'topic': 'History of the Internet'}, page_content='The internet originated from research commissioned by the U.S. Department of Defense in the 1960s. It evolved from ARPANET and grew into the global network we use today for communication, commerce, and information sharing.')]