In [3]:
import vertexai
from vertexai.language_models import TextEmbeddingInput
from vertexai.language_models import TextEmbeddingModel
import numpy as np
from dotenv import load_dotenv
import os
import chromadb

In [4]:
load_dotenv()
Project = os.getenv('GOOGLE_VERTEX_PROJECT')
Region = os.getenv('REGION_GOOGLE')
vertexai.init(project=Project, location=Region)

In [5]:
#same text as last time
textToSearch = [
    "John really likes his pizza",
    "The sky is blue",
    "John's pizza is a circle",
    "John has severe issues",
    "John needs to escape the horrible situtation he is in",
    "John must run until he can escape",
    "Spiralling spiralling spiralling",
    "Oh look a squirell",
    "John dies a brutal death none could have foreseen",
    "Water is important to drink make sure you get enough"
]
model = TextEmbeddingModel.from_pretrained('textembedding-gecko@003')
searchInput = TextEmbeddingInput(textToSearch, 'RETRIVAL_DOCUMENT')


In [22]:
class EmbeddingFunction(chromadb.EmbeddingFunction):
    isQuery = False
    model = TextEmbeddingModel.from_pretrained('textembedding-gecko@003')

    def __call__(self, input: chromadb.Documents) -> chromadb.Embeddings:
        if self.isQuery:
            task = 'RETRIVAL_DOCUMENT'
        else:
            task = 'RETRIVAL_QUERY'
        

        return [embedding.values for embedding in self.model.get_embeddings(input)][0]


In [23]:
chromaClient = chromadb.Client()
chromaClient.delete_collection("basicRAG")
embed_funct = EmbeddingFunction()
embed_funct.isQuery = False

db = chromaClient.create_collection(name="basicRAG", embedding_function=embed_funct)

In [None]:
db.add(embeddings=[embedding.values for embedding in model.get_embeddings(textToSearch)], ids=[str(i) for i in range(len(textToSearch))])

print(db.peek(1))

Insert of existing embedding ID: 0
Insert of existing embedding ID: 1
Insert of existing embedding ID: 2
Insert of existing embedding ID: 3
Insert of existing embedding ID: 4
Insert of existing embedding ID: 5
Insert of existing embedding ID: 6
Insert of existing embedding ID: 7
Insert of existing embedding ID: 8
Insert of existing embedding ID: 9
Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9


{'ids': ['0'], 'embeddings': array([[ 1.65364128e-02,  2.44827811e-02, -3.88693735e-02,
        -2.84202136e-02,  2.04493124e-02, -1.71391126e-02,
         3.11659686e-02, -4.52025011e-02,  3.56891155e-02,
        -5.68566087e-04,  9.69562773e-03,  2.84164436e-02,
        -4.29993793e-02, -1.45395529e-02, -3.51296254e-02,
        -2.20473278e-02,  1.31591409e-02,  1.07965367e-02,
        -7.93996826e-03, -8.96787122e-02, -1.24726584e-02,
         4.65983013e-03, -1.58549659e-02, -4.75064898e-03,
         1.29675558e-02, -1.64196249e-02,  1.69451889e-02,
        -6.59091324e-02, -1.60754356e-03,  1.87165271e-02,
        -9.29248556e-02,  8.70947726e-03, -4.76368517e-02,
         4.36788090e-02, -3.16439942e-02, -4.41407003e-02,
        -1.45840673e-02,  4.22155224e-02,  4.03991230e-02,
         3.03589590e-02,  3.87703371e-03, -3.84497544e-04,
        -3.35862152e-02, -2.47564595e-02,  8.39487314e-02,
        -4.78018075e-03, -1.49872405e-02,  9.95223317e-03,
        -3.06536686e-02, -7

In [35]:
db.count()

10

In [39]:
query = "What was the shape of John's pizza"
embedding = [model.get_embeddings([query])[0].values]
result = db.query(query_embeddings=embedding)
[relevantEmbeddings] = result['ids']
print("texts in order of relevance")
for item in relevantEmbeddings:
    print(textToSearch[int(item)])

texts in order of relevance
John's pizza is a circle
John really likes his pizza
John dies a brutal death none could have foreseen
John must run until he can escape
John has severe issues
John needs to escape the horrible situtation he is in
Spiralling spiralling spiralling
The sky is blue
Oh look a squirell
Water is important to drink make sure you get enough
