In [2]:
import os

from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.models import VectorizedQuery
from azure.search.documents.indexes.models import (
        SearchIndex,
        SearchField,
        SearchFieldDataType,
        SimpleField,
        SearchableField,
        VectorSearch,
        VectorSearchProfile,
        HnswAlgorithmConfiguration,
    )
from azure.core.exceptions import ResourceNotFoundError


In [4]:
def get_all_search_indexes():
    # Create a SearchIndexClient
    credential = AzureKeyCredential(key)
    index_client = SearchIndexClient(service_endpoint, credential)
    
    # Get all indexes
    indexes = list(index_client.list_indexes())

    final_indexes = [index.name for index in indexes]
        
    return final_indexes

In [5]:
print("Retrieving all indexes...")
# get all indexes
all_indexes = get_all_search_indexes()
print(all_indexes)

Retrieving all indexes...
['defect-data', 'defects-index', 'defects', 'genai', 'genai1234', 'genai12345', 'hotel-index', 'hotels-sample-index', 'hotels-vector-index', 'new_defects', 'teeenytinyindex']


In [6]:
def get_bug_index(name: str):
    fields = [
        SimpleField(name="ID", type=SearchFieldDataType.String, key=True),
        SearchableField(name="Work_Item_Type", type=SearchFieldDataType.String, filterable=True, facetable=True),
        SearchableField(name="State", type=SearchFieldDataType.String, filterable=True, facetable=True),
        SearchableField(name="Area", type=SearchFieldDataType.String, filterable=True),
        SearchField(
            name="TitleVector",
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=1536,
             vector_search_profile_name="my-vector-config",
    ),

        SearchableField(name="Title", type=SearchFieldDataType.String),
        SearchableField(name="Description", type=SearchFieldDataType.String),
        SearchableField(name="Repro_Steps", type=SearchFieldDataType.String),
        SearchableField(name="Created_Date", type=SearchFieldDataType.String, sortable=True),
        SearchableField(name="Created_By", type=SearchFieldDataType.String),
        SearchableField(name="Assigned_To", type=SearchFieldDataType.String),
        SearchableField(name="Iteration_Path", type=SearchFieldDataType.String),
        SearchableField(name="Severity", type=SearchFieldDataType.String, filterable=True, facetable=True),
        SearchableField(name="Priority", type=SearchFieldDataType.Double),
        SearchableField(name="Efforts", type=SearchFieldDataType.String),
        SearchableField(name="Comment", type=SearchFieldDataType.String),
        SearchableField(name="Closed_By", type=SearchFieldDataType.String),
        SearchableField(name="Closed_Date", type=SearchFieldDataType.String),
        SearchableField(name="Closing_Comment", type=SearchFieldDataType.String),
        SearchableField(name="Reason", type=SearchFieldDataType.String),
        SearchableField(name="Story_Points", type=SearchFieldDataType.String),
        SearchableField(name="Parent_Feature_Id", type=SearchFieldDataType.String),
        SearchableField(name="Parent_Feature_Title", type=SearchFieldDataType.String),
 ]

    vector_search = VectorSearch(
        profiles=[VectorSearchProfile(name="my-vector-config", algorithm_configuration_name="my-algorithms-config")],
        algorithms=[HnswAlgorithmConfiguration(name="my-algorithms-config")],
 )

    return SearchIndex(name=name, fields=fields, vector_search=vector_search)


In [7]:
credential = AzureKeyCredential(key)
index_client = SearchIndexClient(service_endpoint, credential)
index = get_bug_index(index_name)
print(type(index))

print("Creating the index if it does not exist")
try:
    index_client.get_index(index.name)
    print(f"Index '{index.name}' already exists.")
except ResourceNotFoundError:
    index_client.create_index(index)
    print(f"Index '{index.name}' created successfully.")


print(f"Index {index.name} created successfully.")


<class 'azure.search.documents.indexes.models._index.SearchIndex'>
Creating the index if it does not exist
Index 'new_defects' already exists.
Index new_defects created successfully.


In [8]:
index_client.delete_index(index_name)


In [9]:
index = get_bug_index(index_name)
try:
    index_client.get_index(index.name)
except ResourceNotFoundError:
    print("creating a new one")
    index_client.create_index(index)



creating a new one


In [10]:
def get_existing_index_fields(index_name):
    """
    Get the existing index schema to understand current field structure
    """
    try:
        credential = AzureKeyCredential(key)
        index_client = SearchIndexClient(service_endpoint, credential)
        existing_index = index_client.get_index(index_name)
        
        print(f"Existing index '{index_name}' fields:")
        for field in index.fields:
            print(f"Name: {field.name}, Type: {field.type}, Key: {field.key}, Searchable: {field.searchable}, Filterable: {field.filterable}")

        
        return existing_index
    except Exception as e:
        print(f"Error getting existing index: {e}")
        return None

In [11]:
get_existing_index_fields(index_name)

Existing index 'new_defects' fields:
Name: ID, Type: Edm.String, Key: True, Searchable: False, Filterable: False
Name: Work_Item_Type, Type: Edm.String, Key: False, Searchable: True, Filterable: True
Name: State, Type: Edm.String, Key: False, Searchable: True, Filterable: True
Name: Area, Type: Edm.String, Key: False, Searchable: True, Filterable: True
Name: TitleVector, Type: Collection(Edm.Single), Key: None, Searchable: True, Filterable: None
Name: Title, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Description, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Repro_Steps, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Created_Date, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Created_By, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Assigned_To, Type: Edm.String, Key: False, Searchable: True, Filterable: False
Name: Iteration_Path, Type: Edm.String, Key: F

<azure.search.documents.indexes.models._index.SearchIndex at 0x7bdf7ff1ad40>

In [None]:
def get_embeddings(text: str):
    """
    Generate embeddings for the given text using Azure OpenAI
    """
    import openai


    # Handle None or empty text
    if not text or text.strip() == "":
        text = "No title provided"

    response = openai.Embedding.create(input=text, engine="text-embedding-ada-002")
    return response.data[0].embedding

In [13]:
def get_defect_documents():
    """
    Prepare defect data for indexing with proper field name mapping
    """
    data = [
        {
            "Work Item Type": "Bug",
            "ID": 897,
            "State": "Closed",
            "Area": "Gen AI QEP 2.0\\Create - Existing Solution Enhancements",
            "Title": "When a solution is created on a name which is already existing, the sysyem should give an api error alert. ",
            "Description": None,
            "Repro Steps": "<div><span style=\"display:inline !important;\">When a user creates a solution by a duplicate name, there should be an alert message &quot;Solution already exists&quot;.</span><br> </div>",
            "Created Date": "11/27/2023 5:43:52 PM",
            "Created By": None,
            "Assigned To": None,
            "Iteration Path": "Gen AI QEP 2.0\\PI-2.1",
            "Severity": "3 - Medium",
            "Priority": 2.0,
            "Efforts": None,
            "Comment": None,
            "Closed By": "Siva Reddy Dirisanapu <siva-reddy.dirisanapu@capgemini.com>",
            "Closed Date": "2023-01-12T16:25:11",
            "Closing Comment": None,
            "Reason": "Fixed and verified",
            "Story Points": None,
            "Parent Feature Id": 555.0,
            "Parent Feature Title": None
        }
    ]

    # Transform data to match existing index structure
    transformed_data = []
    for i, item in enumerate(data):
        transformed_item = {
            "ID": str(item["ID"]),
            "Work_Item_Type": item["Work Item Type"],
            "State": item["State"],
            "Area": item["Area"],
            "Title": item["Title"],
            "Description": item["Description"] or "No description provided",
            "Repro_Steps": item["Repro Steps"],
            "Created_Date": item["Created Date"],
            "Created_By": item["Created By"],
            "Assigned_To": item["Assigned To"],
            "Iteration_Path": item["Iteration Path"],
            "Severity": item["Severity"],
            "Priority": str(item["Priority"]),
            "Efforts": item["Efforts"],
            "Comment": item["Comment"],
            "Closed_By": item["Closed By"],
            "Closed_Date": item["Closed Date"],
            "Closing_Comment": item["Closing Comment"],
            "Reason": item["Reason"],
            "Story_Points": item["Story Points"],
            "Parent_Feature_Id": str(item["Parent Feature Id"]),
            "Parent_Feature_Title": item["Parent Feature Title"],
            "TitleVector": get_embeddings(item["Title"])  # Use existing vector field
        }
        transformed_data.append(transformed_item)

    return transformed_data

In [1]:
# Create a SearchClient to interact with the index
print(service_endpoint)
print(index_name)
client = SearchClient(service_endpoint, index_name, credential)
defect_docs = get_defect_documents()
print(defect_docs)
print("Uploading defect documents to the index...")
# create embeddings
client.upload_documents(documents=defect_docs)

print("defects documents uploaded successfully.")

NameError: name 'service_endpoint' is not defined

In [15]:
def get_all_defect_documents():
    """
    Retrieves all defect documents from the Azure Cognitive Search index.
    """

    print(f"key is ........{key}")
    print(f"service_endpoint is ........{service_endpoint}")
    try:
        credential = AzureKeyCredential(key)
        search_client = SearchClient(service_endpoint, index_name, credential)

        # search_text="*" matches all documents
        results = search_client.search(
            search_text="*",
            select=["ID", "Title", "Description", "Severity", "State", "Work_Item_Type"],
            top=1000
        )

        documents = list(results)

        print(f"Retrieved {len(documents)} defect document(s).")
        for doc in documents:
            print(f"  - ID: {doc['ID']}, Title: {doc.get('Title', 'N/A')}, Severity: {doc.get('Severity', 'N/A')}, State: {doc.get('State', 'N/A')}")

        return documents

    except Exception as ex:
        print(f"Error retrieving defect documents: {ex}")
        raise

In [17]:
def get_defect_documents_by_id(document_ids):
    """
    Retrieves defect documents by ID
    """
    try:
        credential = AzureKeyCredential(key)
        search_client = SearchClient(service_endpoint, index_name, credential)

        documents = []
        for doc_id in document_ids:
            try:
                doc = search_client.get_document(key=str(doc_id))
                documents.append(doc)
                print(f"  - ID: {doc['ID']}, Title: {doc.get('Title', 'N/A')}, Severity: {doc.get('Severity', 'N/A')}")
            except Exception as e:
                print(f"  - Could not find defect document with ID {doc_id}: {e}")

        return documents

    except Exception as ex:
        print(f"Error: {ex}")
        raise


In [20]:
get_defect_documents_by_id(["897"])

  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium


[{'ID': '897',
  'Work_Item_Type': 'Bug',
  'State': 'Closed',
  'Area': 'Gen AI QEP 2.0\\Create - Existing Solution Enhancements',
  'Title': 'When a solution is created on a name which is already existing, the sysyem should give an api error alert. ',
  'Description': 'No description provided',
  'Repro_Steps': '<div><span style="display:inline !important;">When a user creates a solution by a duplicate name, there should be an alert message &quot;Solution already exists&quot;.</span><br> </div>',
  'Created_Date': '11/27/2023 5:43:52 PM',
  'Created_By': None,
  'Assigned_To': None,
  'Iteration_Path': 'Gen AI QEP 2.0\\PI-2.1',
  'Severity': '3 - Medium',
  'Priority': '2.0',
  'Efforts': None,
  'Comment': None,
  'Closed_By': 'Siva Reddy Dirisanapu <siva-reddy.dirisanapu@capgemini.com>',
  'Closed_Date': '2023-01-12T16:25:11',
  'Closing_Comment': None,
  'Reason': 'Fixed and verified',
  'Story_Points': None,
  'Parent_Feature_Id': '555.0',
  'Parent_Feature_Title': None}]

In [25]:
def single_vector_search_defects(query):
    """
    Perform vector search on defect titles
    """
    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
    vector_query = VectorizedQuery(
        vector=get_embeddings(query), 
        k_nearest_neighbors=5, 
        fields="TitleVector"  # Use existing vector field
    )

    results = search_client.search(
        vector_queries=[vector_query],
        select=["ID", "Title", "Description", "Severity", "State"],
    )

    print(f"Vector search results for query: '{query}'")
    for result in results:
        print(f"  - ID: {result.get('ID', 'N/A')}, Title: {result.get('Title', 'N/A')}, Score: {result.get('@search.score', 'N/A')}")

    return list(results)

In [26]:
single_vector_search_defects(query = "API error alert")

Vector search results for query: 'API error alert'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Score: 0.8666446


[]

In [30]:
def single_vector_search_with_filter_defects(query="API error", severity_filter="3 - Medium"):
    """
    Perform vector search with severity filter
    """
    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
    vector_query = VectorizedQuery(
        vector=get_embeddings(query),
        k_nearest_neighbors=5,
        fields="TitleVector"  # Use existing vector field
    )

    results = search_client.search(
        search_text=query,
        vector_queries=[vector_query],
        filter=f"Severity eq '{severity_filter}'",
        select=[ "ID", "Title", "Severity", "State"],
        top=5
    )

    print(f"Filtered vector search results for query: '{query}' with severity: '{severity_filter}'")
    for result in results:
        print(f"  - ID: {result.get('ID', 'N/A')}, Title: {result.get('Title', 'N/A')}, Severity: {result.get('Severity', 'N/A')}")

    return list(results)

In [31]:
single_vector_search_with_filter_defects()

Filtered vector search results for query: 'API error' with severity: '3 - Medium'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium


[]

In [37]:
def multi_query_hybrid_search_defects(queries):
    """
    Perform hybrid search for multiple defect-related queries
    """
    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))
    
    all_results = []
    
    for query in queries:
        vector_query = VectorizedQuery(
            vector=get_embeddings(query), 
            k_nearest_neighbors=3, 
            fields="TitleVector"  # Use existing vector field
        )
        
        results = search_client.search(
            search_text=query,
            vector_queries=[vector_query],
            select=["ID", "Title", "Severity", "State"],
            top=3
        )
        
        query_results = list(results)
        all_results.append({
            "query": query,
            "results": query_results
        })
        
        print(f"Results for defect query: '{query}'")
        for result in query_results:
            print(f"  - ID: {result.get('ID', 'N/A')}, Title: {result.get('Title', 'N/A')}, Severity: {result.get('Severity', 'N/A')}")
        print("---")
    
    return all_results

In [38]:
multi_query_hybrid_search_defects([
    "What happens if a solution is created with a duplicate name?",
    "Is there an alert when trying to create an existing solution?",
    "Defect related to duplicate solution creation in Gen AI QEP 2.0 PI-2.1",
    "System behavior when solution name already exists",
    "Missing API error for duplicate solution creation",
    "Closed bugs with medium severity and fixed status in Gen AI QEP"
])

Results for defect query: 'What happens if a solution is created with a duplicate name?'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium
---
Results for defect query: 'Is there an alert when trying to create an existing solution?'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium
---
Results for defect query: 'Defect related to duplicate solution creation in Gen AI QEP 2.0 PI-2.1'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium
---
Results for defect query: 'System behavior when solution name already exists'
  - ID: 897, Title: When a solution is created on a name which is already existing, the sysyem should give an api error alert. , Severity: 3 - Medium
---
Results for defect query: 'M

[{'query': 'What happens if a solution is created with a duplicate name?',
  'results': [{'Severity': '3 - Medium',
    'ID': '897',
    'State': 'Closed',
    'Title': 'When a solution is created on a name which is already existing, the sysyem should give an api error alert. ',
    '@search.score': 0.03333333507180214,
    '@search.reranker_score': None,
    '@search.highlights': None,
    '@search.captions': None,
    '@search.document_debug_info': None}]},
 {'query': 'Is there an alert when trying to create an existing solution?',
  'results': [{'Severity': '3 - Medium',
    'ID': '897',
    'State': 'Closed',
    'Title': 'When a solution is created on a name which is already existing, the sysyem should give an api error alert. ',
    '@search.score': 0.03333333507180214,
    '@search.reranker_score': None,
    '@search.highlights': None,
    '@search.captions': None,
    '@search.document_debug_info': None}]},
 {'query': 'Defect related to duplicate solution creation in Gen AI QEP

In [102]:
def delete_defect_document(defect_id):
    """
    Delete a defect document by hotelId (the key field)
    """
    try:
        credential = AzureKeyCredential(key)
        search_client = SearchClient(service_endpoint, index_name, credential)
        
        search_client.delete_documents(documents=[{"hotelId": str(defect_id)}])
        print(f"Defect document with ID '{defect_id}' deleted.")
        
    except Exception as e:
        print(f"Error deleting defect document with ID '{defect_id}': {e}")

In [103]:
def delete_index(index_name):
    """
    Delete the entire search index
    """
    credential = AzureKeyCredential(key)
    index_client = SearchIndexClient(service_endpoint, credential)

    try:
        index_client.delete_index(index_name)
        print(f"Index '{index_name}' deleted successfully.")
    except Exception as e:
        print(f"Error deleting index '{index_name}': {e}")