In [1]:
!pip install chromadb



In [32]:
import chromadb

client = chromadb.PersistentClient(path="./chroma_db")

In [33]:
collection_name = "collection2"

# Delete the collection if it already exists
try:
    client.delete_collection(name=collection_name)
except:
    pass

collection = client.create_collection(name=collection_name)

collection.add(
    ids=["1","2","3"],
    embeddings=[[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5]],
    metadatas=[{"name": "ravi"},{"name": "adani"},{"name":"ragav"}]
)

In [34]:
print(client.list_collections())

[Collection(name=collection2)]


In [35]:
print(collection.get(ids=["2"],include=["embeddings","metadatas"]))



{'ids': ['2'], 'embeddings': array([[0.2       , 0.30000001, 0.40000001]]), 'documents': None, 'uris': None, 'included': ['embeddings', 'metadatas'], 'data': None, 'metadatas': [{'name': 'adani'}]}


In [36]:
collection.add(
    ids=["4"],
    embeddings=[[0.4, 0.5, 0.6]],
    documents=["ragav is a software engineer"]
)

In [37]:
collection.query(
    query_embeddings=[[0.2, 0.3, 0.4]],
    n_results=2,
    include=["documents"]
)


{'ids': [['2', '3']],
 'embeddings': None,
 'documents': [[None, None]],
 'uris': None,
 'included': ['documents'],
 'data': None,
 'metadatas': None,
 'distances': None}

In [38]:
collection.update(
    ids=["2"],
    embeddings=[[0.2, 0.3, 0.4]],
    documents=["ragav is a software engineer updated"]
)

In [39]:
print(collection.get(ids=["2"],include=["embeddings","documents"]))



{'ids': ['2'], 'embeddings': array([[0.2       , 0.30000001, 0.40000001]]), 'documents': ['ragav is a software engineer updated'], 'uris': None, 'included': ['embeddings', 'documents'], 'data': None, 'metadatas': None}


In [40]:
collection.delete(ids=["4"]) # we can delete like this

In [41]:
for collection in client.list_collections():
      client.delete_collection(collection.name)

In [42]:
print(client.list_collections())

[]


## **CRUD operations on chromabd is completed**

In [4]:
!pip install python-dotenv openai



In [43]:
from dotenv import load_dotenv
from google.colab import userdata


load_dotenv()  # Load environment variables from .env file

import os
# api_key = os.getenv('OPENAI_API_KEY')  # Retrieve the key

# api_key = userdata.get('OPENAI_API_KEY')



# api_key = userdata.get('OPENAI_API_KEY')
# os.environ['OPENAI_API_KEY'] = api_key

api_key = userdata.get('gen_ai')


In [44]:
from google import genai

import numpy as np


collection_name = "collection2"



collection = client.create_collection(name=collection_name)


client = genai.Client(api_key=api_key)

In [45]:
# import openai

def get_embeddings(text):

  responce = client.models.embed_content(model="gemini-embedding-001",contents=[text])

  return responce.embeddings

In [46]:
documents = [

    "The Eiffel Tower is located in Paris.",
    "The Colosseum is in Rome, Italy.",
    "The Taj Mahal is a famous monument in India.",
    "Mount Everest is the highest mountain in the world.",
    "Python is a popular programming language."

]

embeddings = [np.array(get_embeddings(doc)[0].values) for doc in documents]

print(embeddings)

collection.add(
    ids=[str(i) for i in range(len(documents))],
    embeddings = embeddings,
    documents = documents
)



[array([-0.02436587,  0.020168  ,  0.01794457, ..., -0.00755434,
       -0.00661988,  0.00179243]), array([-0.03304885, -0.00805092,  0.00644503, ..., -0.00272787,
        0.01590721,  0.00357121]), array([-0.01745653,  0.02275   , -0.00326398, ...,  0.01432696,
        0.00852369, -0.00263366]), array([-1.1742769e-02, -8.3420690e-03,  9.0263510e-05, ...,
        1.9048631e-02,  3.1855071e-03,  6.5310660e-03])]


In [48]:
query_text = "where is Eiffle tower is located ?"
query_embeddings = np.array(get_embeddings(query_text)[0].values)

results = collection.query(
    query_embeddings=[query_embeddings],
    n_results=2,
    include=["documents","distances"]
)



print(query_text)
print(results["documents"][0])
print(results["distances"][0])

where is Eiffle tower is located ?
['The Eiffel Tower is located in Paris.The Colosseum is in Rome, Italy.', 'The Taj Mahal is a famous monument in India.']
[0.5390791296958923, 0.8940302133560181]


In [None]:
collection.delete(ids=[""]) # to delete

# gemini api connection, embeddings, chromadb complete

In [1]:
!pip install langchain_community langchainhub chromadb langchain



In [4]:
!pip install -qU langchain-google-genai

In [5]:

from langchain.document_loaders import WebBaseLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = WebBaseLoader(
    web_paths = ["https://www.educosys.com/course/genai"]
)

docs = loader.load()



In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# documents = text_splitter.split_documents(docs)

spilts = text_splitter.split_documents(docs)

In [12]:
from langchain_community.vectorstores import Chroma

from langchain_google_genai import GoogleGenerativeAIEmbeddings

import os

from google.colab import userdata

os.environ["GOOGLE_API_KEY"] = userdata.get('gen_ai')

vectorstore = Chroma.from_documents(documents=spilts, embedding=GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001"))

In [13]:
retriever = vectorstore.as_retriever()



In [14]:
from langchain import hub
# Prompt
prompt = hub.pull("rlm/rag-prompt") # pulls a predefined RAG prompt template from LangChain Hub

In [15]:
from langchain_google_genai import ChatGoogleGenerativeAI



llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0,)


In [16]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser



In [17]:
def format_docs(docs):

  return "/n/n".join(doc.page_content for doc in docs)



In [20]:
rag_chain = ({"context": retriever | format_docs, "question": RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser()
             )

In [22]:
rag_chain.invoke("Give me the curriculum for week 1 for genai course")

'For Week 1 of the Generative AI course, the curriculum covers Foundations of Generative AI, including an Introduction to AI, Mathematical Foundations for AI (Probability, Statistics, and Linear Algebra), and Basics of Neural Networks (Gradient Descent, Optimization, and Architectures like Feedforward, RNN, and CNN). It also includes two mini-projects: building a simple neural network using TensorFlow and training an Autoencoder on the MNIST Dataset.'

In [None]:
from langchain_core.runnables import RunnableLambda

def print_text(prompt_text):
  # wirte commands here
  return prompt_text

rag_chain = ({"context" : retriever | format_docs, "question": RunnablePassthrough()}
             | prompt
             | RunnableLambda(print_prompt)
             | llm
             | StrOutputParser())