# Samples for Vertex AI Vector Search

### Chat bot scenario - Using User questions to determine service category

reference https://github.com/GoogleCloudPlatform/generative-ai/blob/main/embeddings/intro-textemb-vectorsearch.ipynb

#### Generate test data
List of json formatted input 

```
{ 'qn':'I would like to change my delivery date', 'cat': '1'},
{ 'qn': 'change delivery address', 'cat': '1'},
{ 'qn': 'I would like to delay the delivery as I will be away', 'cat': '1'},
{ 'qn': 'Can I check the status of order #12345? it has been a while', 'cat': '2'},
{ 'qn': 'How long will order #483746 take? it has been 3 weeks since i ordered', 'cat': '2'},
{ 'qn': 'I would like to check my order status, the order number is 234555', 'cat': '2'},
{ 'qn': 'Can i get a refund , the product is defective', 'cat': '3'},
{ 'qn': 'What is the return process for a spoilt item?', 'cat': '3'},
{ 'qn': 'I got a wrong order, how do i do an exchange?', 'cat': '3'},
{ 'qn': 'The items i received is totally not working, please give me a refund', 'cat': '3'}
```


In [7]:
import pandas as pd
from typing import List, Optional


In [8]:
# load jsonl file 
jsonObj = pd.read_json(path_or_buf="questions-categories.jsonl", lines=True)
#print(jsonObj.qn[0])
#print(jsonObj.qn.to_string(index=False))
texts: List[str] = [jsonObj.qn.tolist]
print(texts)



[<bound method IndexOpsMixin.tolist of 0              I would like to change my delivery date
1                              change delivery address
2    I would like to delay the delivery as I will b...
3    Can I check the status of order #12345? it has...
4    How long will order #483746 take? it has been ...
5    I would like to check my order status, the ord...
6        Can i get a refund , the product is defective
7        What is the return process for a spoilt item?
8        I got a wrong order, how do i do an exchange?
9    The items i received is totally not working, p...
Name: qn, dtype: object>]


In [11]:
# sandbox-project-1673797795
# init the vertexai package
import vertexai
PROJECT_ID="sandbox-project-1673797795"
LOCATION="asia-southeast1"
vertexai.init(project=PROJECT_ID, location=LOCATION, service_account="953640930424-compute@developer.gserviceaccount.com")


In [18]:
# @title { run: "auto" }
MODEL = "text-embedding-004"  # @param ["text-embedding-004", "text-multilingual-embedding-002","text-embedding-preview-0409", "text-multilingual-embedding-preview-0409", "textembedding-gecko@003", "textembedding-gecko-multilingual@001"]
TASK = "RETRIEVAL_DOCUMENT"  # @param ["RETRIEVAL_QUERY", "RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING", "QUESTION_ANSWERING", "FACT_VERIFICATION"]
TEXT = "Banana Muffin?"  # @param {type:"string"}
TEXT = texts
TITLE = ""  # @param {type:"string"}
OUTPUT_DIMENSIONALITY = 256  # @param [1, 768, "None"] {type:"raw", allow-input:true}

if not MODEL:
    raise ValueError("MODEL must be specified.")
if not TEXT:
    raise ValueError("TEXT must be specified.")
if TITLE and TASK != "RETRIEVAL_DOCUMENT":
    raise ValueError("TITLE can only be specified for TASK 'RETRIEVAL_DOCUMENT'")
if OUTPUT_DIMENSIONALITY is not None and MODEL not in [
    "text-embedding-004",
    "text-multilingual-embedding-002",
    "text-embedding-preview-0409",
    "text-multilingual-embedding-preview-0409",
]:
    raise ValueError(f"OUTPUT_DIMENTIONALITY cannot be specified for model '{MODEL}'.")
if TASK in ["QUESTION_ANSWERING", "FACT_VERIFICATION"] and MODEL not in [
    "text-embedding-004",
    "text-multilingual-embedding-002",
    "text-embedding-preview-0409",
    "text-multilingual-embedding-preview-0409",
]:
    raise ValueError(f"TASK '{TASK}' is not valid for model '{MODEL}'.")

In [26]:
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel


def embed_text(
    model_name: str,
    task_type: str,
    text: str,
    title: str = "",
    output_dimensionality=None,
) -> list:
    """Generates a text embedding with a Large Language Model."""
    model = TextEmbeddingModel.from_pretrained(model_name)
    text_embedding_input = TextEmbeddingInput(
        task_type=task_type, title=title, text=text
    )
    kwargs = (
        dict(output_dimensionality=output_dimensionality)
        if output_dimensionality
        else {}
    )
    embeddings = model.get_embeddings([text_embedding_input], **kwargs)
    return embeddings[0].values


# Get a text embedding for a downstream task.
print(TEXT)
t = 'I would like to change my delivery date'
print(t)
embedding = embed_text(
    model_name=MODEL,
    task_type=TASK,
    text=t,
    title=TITLE,
    output_dimensionality=OUTPUT_DIMENSIONALITY,
)
print(embedding)
print(len(embedding))  # Expected value: {OUTPUT_DIMENSIONALITY}.

[<bound method IndexOpsMixin.tolist of 0              I would like to change my delivery date
1                              change delivery address
2    I would like to delay the delivery as I will b...
3    Can I check the status of order #12345? it has...
4    How long will order #483746 take? it has been ...
5    I would like to check my order status, the ord...
6        Can i get a refund , the product is defective
7        What is the return process for a spoilt item?
8        I got a wrong order, how do i do an exchange?
9    The items i received is totally not working, p...
Name: qn, dtype: object>]
I would like to change my delivery date
[0.022888263687491417, 0.002359802136197686, 0.0019464048091322184, -0.011447086930274963, 0.004758581519126892, -0.016411660239100456, -0.01941780373454094, 0.0016531210858374834, 0.013646667823195457, 0.008857974782586098, 0.00420465087518096, 0.04343574121594429, 0.07413917779922485, 0.013336907140910625, -0.04980216547846794, -0.0445653833