# Other Approaches
1. We create an index without using an indexer and skillset
2. Search index will contain multiple vector field
3. Perform various vector search

## Install dependencies

In [None]:
%pip install python-dotenv
%pip install azure-core
%pip install azure-search-documents
%pip install azure-storage-blob
%pip install azure-identity
%pip install openai

## Prerequisites: Azure Resources Needed
1. Azure AI Search Resource
    - Basic Pricing Tier or Higher
    - Semantic Ranker Setting Enabled
    - System Assigned Managed Identity Enabled

2. Azure OpenAI
    - Deploy GPT-4o, text-embedding-3-large
    - Cognitive Services OpenAI User role assignment to the Azure AI Search.

3. Azure Storage Account
    - Create container called nasabooks and upload nasabooks pdfs.

4. Azure AI services multi-service account

5. Look for envfile.txt and rename that to .env. Fill up the needed Key and Endpoints

## Load Azure configurations

You always need to run this!

In [None]:
from dotenv import load_dotenv
import os

load_dotenv() # take environment variables from .env.

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
azure_openai_embeddings_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
azure_openai_embedding_size=1536
azure_search_service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
azure_search_service_admin_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
azure_search_service_index_name = "az-search-index-002"
azure_storage_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
azure_ai_services_key = os.getenv("AZURE_AI_MULTISERVICE_KEY")

## Create an index

In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchField,
    SimpleField,
    SearchableField,
    SearchFieldDataType,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    SearchIndex,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
)

# Get credential from Azure AI Search Admin key
credential = AzureKeyCredential(azure_search_service_admin_key)

# Create a search index
index_client = SearchIndexClient(
  endpoint=azure_search_service_endpoint, 
  credential=credential)

fields = [
    SimpleField(name="id", type=SearchFieldDataType.String, key=True, sortable=True, filterable=True, facetable=True),
    SearchableField(name="title", type=SearchFieldDataType.String),
    SearchableField(name="content", type=SearchFieldDataType.String),
    SearchableField(name="category", type=SearchFieldDataType.String,
                    filterable=True),
    SearchField(name="titleVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=azure_openai_embedding_size, vector_search_profile_name="myHnswProfile"),
    SearchField(name="contentVector", type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
                searchable=True, vector_search_dimensions=azure_openai_embedding_size, vector_search_profile_name="myHnswProfile"),
]

# Configure the vector search configuration  
vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(name="myHnsw")
    ],
    profiles=[
        VectorSearchProfile(
            name="myHnswProfile",
            algorithm_configuration_name="myHnsw",
            vectorizer_name="myVectorizer"
        )
    ],
    vectorizers=[   # a vectorizer is software that performs vectorization
        AzureOpenAIVectorizer(  
            vectorizer_name="myVectorizer",  
            kind="azureOpenAI",  
            parameters=AzureOpenAIVectorizerParameters(  
                resource_url=azure_openai_endpoint,  
                deployment_name=azure_openai_embeddings_deployment,
                model_name=azure_openai_embeddings_deployment
            ),
        ),  
    ], 
)

semantic_config = SemanticConfiguration(
    name="my-semantic-config",
    prioritized_fields=SemanticPrioritizedFields(
        title_field=SemanticField(field_name="title"),
        keywords_fields=[SemanticField(field_name="category")],
        content_fields=[SemanticField(field_name="content")]
    )
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index with the semantic settings
index = SearchIndex(name=azure_search_service_index_name, 
                    fields=fields,
                    vector_search=vector_search, 
                    semantic_search=semantic_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

## Creating embeddings separately

We are computing the embeddings manually without using the indexer and skillset

In [None]:
from openai import AzureOpenAI
import json

# Azure OpenAI client
openai_client = AzureOpenAI(
    # to get version: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    azure_deployment=azure_openai_embeddings_deployment,
    api_key=azure_openai_key)

# Read the AzureServices.json
path = os.path.join('Data/azureservices/', 'AzureServices.json')
with open(path, 'r', encoding='utf-8') as file:
    input_data = json.load(file)

titles = [item['title'] for item in input_data]
content = [item['content'] for item in input_data]
title_response = openai_client.embeddings.create(input=titles, 
                                                 model=azure_openai_embeddings_deployment, 
                                                 dimensions=azure_openai_embedding_size)
title_embeddings = [item.embedding for item in title_response.data]
content_response = openai_client.embeddings.create(input=content, 
                                                   model=azure_openai_embeddings_deployment, 
                                                   dimensions=azure_openai_embedding_size)
content_embeddings = [item.embedding for item in content_response.data]

# Generate embeddings for title and content fields
for i, item in enumerate(input_data):
    title = item['title']
    content = item['content']
    item['titleVector'] = title_embeddings[i]
    item['contentVector'] = content_embeddings[i]

# Output embeddings json file
output_path = os.path.join('Data/azureservices/', 'AzureServicesVectors.json')
output_directory = os.path.dirname(output_path)
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
with open(output_path, "w") as f:
    json.dump(input_data, f)

## Upload data into the Search Index

In [None]:
from azure.search.documents import SearchIndexingBufferedSender

output_path = os.path.join('Data/azureservices/', 'AzureServicesVectors.json')
# Upload some documents to the index  
with open(output_path, 'r') as file:  
    documents = json.load(file)  

# Use SearchIndexingBufferedSender to upload the documents in batches optimized for indexing  
with SearchIndexingBufferedSender(  
    endpoint=azure_search_service_endpoint,  
    index_name=azure_search_service_index_name,  
    credential=credential,  
) as batch_client:  
    # Add upload actions for all documents  
    batch_client.upload_documents(documents=documents)  
print(f"Uploaded {len(documents)} documents in total") 

## Vector search with multilingual capabilities

In [None]:
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery
from azure.core.credentials import AzureKeyCredential

# Get credential from Azure AI Search Admin key
credential = AzureKeyCredential(azure_search_service_admin_key)
search_client = SearchClient(endpoint=azure_search_service_endpoint, 
                             credential=credential, 
                             index_name=azure_search_service_index_name)

# Pure Vector Search multi-lingual (e.g 'tools for software development' in Dutch)  
query = "tools voor softwareontwikkeling"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform an Exhaustive KNN exact nearest neighbor search
This example shows how you can exhaustively search your vector index regardless of what index you have, HNSW or ExhaustiveKNN. You can use this to calculate the ground-truth values.

In [None]:
# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, 
                                     k_nearest_neighbors=3, 
                                     fields="contentVector", 
                                     exhaustive=True)
  
results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform a Cross-Field Vector Search
This example shows a cross-field vector search that allows you to query multiple vector fields at the same time. Note, ensure that the same embedding model was used for the vector fields you decide to query.

In [None]:
# Pure Vector Search
query = "Azure Firewall"  
  
vector_query = VectorizableTextQuery(text=query, 
                                     k_nearest_neighbors=3, 
                                     fields="contentVector, titleVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform a Multi-Vector Search
This example shows a cross-field vector search that allows you to query multiple vector fields at the same time by passing in multiple query vectors.

<b>Note:</b> in this case, you can pass in query vectors from two different embedding models to the corresponding vector fields in your index.

In [None]:
# Multi-Vector Search
query = "tools for software development"  
  

vector_query_1 = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="titleVector")
vector_query_2 = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform a weighted multi-vector search
This example shows a cross-field vector search that allows you to query multiple vector fields at the same time by passing in multiple query vectors with different weighting

In [None]:
# Multi-Vector Search
query = "tools for software development"  
  

vector_query_1 = VectorizableTextQuery(text=query, 
                                       k_nearest_neighbors=3, 
                                       fields="titleVector", 
                                       weight=2)

vector_query_2 = VectorizableTextQuery(text=query, 
                                       k_nearest_neighbors=3, 
                                       fields="contentVector", 
                                       weight=0.5)

results = search_client.search(  
    search_text=None,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
)  
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform a Pure Vector Search with a filter
This example shows how to apply filters on your index. Note, that you can choose whether you want to use Pre-Filtering (default) or Post-Filtering.

In [None]:
from azure.search.documents.models import VectorFilterMode

# Pure Vector Search
query = "tools for software development"  
  
vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=3, fields="contentVector")

results = search_client.search(  
    search_text=None,  
    vector_queries= [vector_query],
    vector_filter_mode=VectorFilterMode.PRE_FILTER,
    filter="category eq 'Developer Tools'",
    select=["title", "content", "category"],
)
  
for result in results:  
    print(f"Title: {result['title']}")  
    print(f"Score: {result['@search.score']}")  
    print(f"Content: {result['content']}")  
    print(f"Category: {result['category']}\n")  

## Perform a Semantic Hybrid Search and Integrate with Chat
This puts everything together and sends response to the language model

In [None]:
# Semantic Hybrid Search
# Give me details on Azure App Service in French
query = "Give me details on Azure App Service"

vector_query_1 = VectorizableTextQuery(text=query, 
                                       k_nearest_neighbors=3, 
                                       fields="titleVector", 
                                       weight=1)

vector_query_2 = VectorizableTextQuery(text=query, 
                                       k_nearest_neighbors=3, 
                                       fields="contentVector", 
                                       weight=1)
results = search_client.search(  
    search_text=query,  
    vector_queries=[vector_query_1, vector_query_2],
    select=["title", "content", "category"],
    query_type="semantic", 
    semantic_configuration_name='my-semantic-config', 
    top=3
)

# Azure OpenAI client
openai_client = AzureOpenAI(
    # to get version: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key)

# Provide instructions to the model
SYSTEM_PROMPT="""
You are an AI assistant that helps users learn from the information found in the source material.
Answer the query using only the sources provided below.
Use bullets if the answer has multiple points.
If the answer is longer than 3 sentences, provide a summary.
Answer ONLY with the facts listed in the list of sources below. Cite your source when you answer the question
If there isn't enough information below, say you don't know.
Do not generate answers that don't use the sources below.
Query: {query}
Sources:\n{sources}
"""

# Use a unique separator to make the sources distinct. 
# We chose repeated equal signs (=) followed by a newline because it's unlikely the source documents contain this sequence.
sources_formatted = "=================\n".join([f'TITLE: {document["title"]}, CONTENT: {document["content"]}, CATEGORY: {document["category"]}' for document in results])

response = openai_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": SYSTEM_PROMPT.format(query=query, sources=sources_formatted)
        }
    ],
    model=azure_openai_deployment
)

print(response.choices[0].message.content)
