# Azure AI Search: vector search, step by step

## Setup API client


In [20]:
import os

import azure.identity
import dotenv
from azure.search.documents.indexes import SearchIndexClient
from azure.core.credentials import AzureKeyCredential
import openai
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchableField, SearchFieldDataType
)
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import ResourceExistsError



In [None]:
search_api_key = ""

dotenv.load_dotenv()

AZURE_SEARCH_SERVICE = "genai-azureaisearch"
search_endpoint = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

index_client = SearchIndexClient(endpoint=search_endpoint, credential=AzureKeyCredential(search_api_key))

In [None]:


# Function to generate embeddings using Azure OpenAI
def generate_embeddings(text):
    response = openai.Embedding.create(input=text, engine="text-embedding-ada-002")
    return response['data'][0]['embedding']

## Search a tiny index

### Create index

In [6]:
from azure.search.documents.indexes.models import (
    HnswAlgorithmConfiguration,
    HnswParameters,
    SearchField,
    SearchFieldDataType,
    SearchIndex,
    SimpleField,
    VectorSearch,
    VectorSearchAlgorithmKind,
    VectorSearchProfile,
)

AZURE_SEARCH_TINY_INDEX = "teeenytinyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX, 
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(name="embedding", 
                    type=SearchFieldDataType.Collection(SearchFieldDataType.Single), 
                    searchable=True, 
                    vector_search_dimensions=3,
                    vector_search_profile_name="embedding_profile")
    ],
    vector_search=VectorSearch(
        algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF
                            name="hnsw_config",
                            kind=VectorSearchAlgorithmKind.HNSW,
                            parameters=HnswParameters(metric="cosine"),
                        )],
        profiles=[VectorSearchProfile(name="embedding_profile", algorithm_configuration_name="hnsw_config")]
    )
)



In [9]:
!pip install --upgrade azure-search-documents --pre


Collecting azure-search-documents
  Downloading azure_search_documents-11.6.0b11-py3-none-any.whl.metadata (22 kB)
Downloading azure_search_documents-11.6.0b11-py3-none-any.whl (338 kB)
Installing collected packages: azure-search-documents
  Attempting uninstall: azure-search-documents
    Found existing installation: azure-search-documents 11.5.2
    Uninstalling azure-search-documents-11.5.2:
      Successfully uninstalled azure-search-documents-11.5.2
Successfully installed azure-search-documents-11.6.0b11


In [11]:
from azure.search.documents.indexes.models import (
    SearchIndex, SimpleField, SearchField,
    SearchFieldDataType, VectorSearch, VectorSearchProfile,
    HnswAlgorithmConfiguration, VectorSearchAlgorithmKind, HnswParameters
)

AZURE_SEARCH_TINY_INDEX = "teeenytinyindex"

index = SearchIndex(
    name=AZURE_SEARCH_TINY_INDEX,
    fields=[
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchField(
            name="embedding",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=3,
            vector_search_profile_name="embedding_profile"
        )
    ],
    vector_search=VectorSearch(
        algorithms=[
            HnswAlgorithmConfiguration(
                name="hnsw_config",
                kind=VectorSearchAlgorithmKind.HNSW,
                parameters=HnswParameters(metric="cosine")
            )
        ],
        profiles=[
            VectorSearchProfile(
                name="embedding_profile",
                algorithm_configuration_name="hnsw_config"
            )
        ]
    )
)


### Insert a few documents with tiny vectors

In [18]:
from azure.search.documents import SearchClient
search_client = SearchClient(endpoint=search_endpoint, index_name=AZURE_SEARCH_TINY_INDEX, credential=AzureKeyCredential(search_api_key))

# Ensure the documents conform to the schema defined in the index
documents = [
    {"id": "1"},
    {"id": "2"},
    {"id": "3"}
]

# Upload the documents to the index
# Ensure the embedding field is correctly formatted as a list of floats

result = search_client.upload_documents(documents=documents)
print(f"Upload result: {result}")

Upload result: [<azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001BE2E3D4C10>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001BE2C691310>, <azure.search.documents._generated.models._models_py3.IndexingResult object at 0x000001BE2C692610>]


### Search using vector similarity

In [19]:
from azure.search.documents.models import VectorizedQuery

r = search_client.search(search_text=None, vector_queries=[
    VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields="embedding")])
for doc in r:
    print(f"id: {doc['id']}, score: {doc['@search.score']}")

ImportError: cannot import name 'HybridCountAndFacetMode' from 'azure.search.documents._generated.models' (c:\Users\cheekish\Desktop\Gen-AI\myenv\Lib\site-packages\azure\search\documents\_generated\models\__init__.py)

## Search a larger index

In [8]:
import azure.identity
import dotenv
import openai

dotenv.load_dotenv()

# Initialize Azure search variables
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"

# Set up OpenAI client based on environment variables
dotenv.load_dotenv()
AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv("AZURE_OPENAI_ADA_DEPLOYMENT")

token_provider = azure.identity.get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
openai_client = openai.AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com",
    azure_ad_token_provider=token_provider)

def get_embedding(text):
    get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)
    return get_embeddings_response.data[0].embedding

In [9]:
AZURE_SEARCH_FULL_INDEX = "gptkbindex"
search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)

search_query = "learning about underwater activities"
search_vector = get_embedding(search_query)
r = search_client.search(search_text=None, top=5, vector_queries=[
    VectorizedQuery(vector=search_vector, k_nearest_neighbors=5, fields="embedding")])
for doc in r:
    content = doc["content"].replace("\n", " ")[:150]
    print(f"Score: {doc['@search.score']:.5f}\tContent:{content}")

Score: 0.80732	Content: Some of the lessons covered under PerksPlus include: · Skiing and snowboarding lessons · Scuba diving lessons · Surfing lessons · Horseback riding le
Score: 0.79299	Content: PerksPlus is not only designed to support employees' physical health, but also their mental health. Regular exercise has been shown to reduce stress,
Score: 0.79254	Content: Under the Northwind Health Plus plan, habilitation services are covered up to a certain dollar amount and number of visits. This amount and the numbe
Score: 0.78812	Content: It is important to understand which type of therapy is best suited for the individual's needs and goals. It is also important to note that habilitati
Score: 0.78661	Content: Occupational Therapy Occupational therapy helps individuals develop, maintain, or restore skills for daily living and work. It can help individuals w
