In [21]:
import os
from dotenv import load_dotenv, find_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient 
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

In [22]:
load_dotenv(find_dotenv('../application/.env'))

True

In [23]:
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

print(endpoint,key)

client = SearchIndexClient(endpoint, AzureKeyCredential(key))
print(client._endpoint)

https://azureseachudemy.search.windows.net lVSNeZwfoWiZQ2NTdoAvUunLzbUHc6KUOIyugheLwvAzSeBdNvhl
https://azureseachudemy.search.windows.net


In [24]:
name = "restaurant-langchain" #"restaurant"
fields = [
    SimpleField(name="restaurantId", type=SearchFieldDataType.String, key=True),
    SimpleField(name="averageCost", type=SearchFieldDataType.Double),
    SearchableField(name="description", type=SearchFieldDataType.String),
    ComplexField(name="address", fields=[
        SimpleField(name="streetAddress", type=SearchFieldDataType.String),
        SimpleField(name="city", type=SearchFieldDataType.String),
    ])
]

In [25]:
from azure.core.exceptions import HttpResponseError

cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []

index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    cors_options=cors_options)

try:
    result = client.create_index(index)
    print(f"Index '{name}' created.")
except HttpResponseError as e:
    print(f"Index '{name}' already exists.",e.message)


Index 'restaurant-langchain' already exists. (ResourceNameAlreadyInUse) Cannot create index 'restaurant-langchain' because it already exists.
Code: ResourceNameAlreadyInUse
Message: Cannot create index 'restaurant-langchain' because it already exists.
Exception Details:	(CannotCreateExistingIndex) Cannot create index 'restaurant-langchain' because it already exists.
	Code: CannotCreateExistingIndex
	Message: Cannot create index 'restaurant-langchain' because it already exists.


### Add documents to the index

In [26]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

index_name = "restaurant-langchain" #"restaurant"
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

documents = [
    {
        'restaurantId': '1',
        'averageCost': 50.0,
        'description': 'Traditional Italian cuisine with a modern twist.',
        'address': {
            'streetAddress': '123 Via Roma',
            'city': 'Rome',
        }
    },
    {
        'restaurantId': '2',
        'averageCost': 70.0,
        'description': 'Family-friendly Italian restaurant with classic dishes.',
        'address': {
            'streetAddress': '456 Via Milano',
            'city': 'Milan',
        }
    },
    {
        'restaurantId': '3',
        'averageCost': 35.0,
        'description': 'Cozy trattoria offering regional specialties.',
        'address': {
            'streetAddress': '789 Via Napoli',
            'city': 'Naples',
        }
    }
]

search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
result = search_client.upload_documents(documents=documents)


In [27]:
for res in result:
    print(f"Upload of document with ID '{res.key}' succeeded: {res.succeeded}")

Upload of document with ID '1' succeeded: True
Upload of document with ID '2' succeeded: True
Upload of document with ID '3' succeeded: True


Now we can retrieve Documents from ACS

In [8]:
results = search_client.search(search_text="Family friendly?")

for result in results:
    print(result)

{'address': {'streetAddress': '456 Via Milano', 'city': 'Milan'}, 'description': 'Family-friendly Italian restaurant with classic dishes.', 'averageCost': 70.0, 'restaurantId': '2', '@search.score': 1.8678205, '@search.reranker_score': None, '@search.highlights': None, '@search.captions': None}


### Using LangChain with ACS

In [9]:
import os
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

In [10]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002", chunk_size=1)
index_name: str = "langchain-example"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.environ.get("SEARCH_ENDPOINT"),
    azure_search_key=os.environ.get("SEARCH_API_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [47]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader('./restaurant', glob="**/*.txt", loader_cls=TextLoader)
#data = loader.load()
#print(len(data))
# Query the index
results = search_client.search(search_text="*") 
# Extract the documents from the results
data = []


KeyError: 'page_content'

In [46]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 120,
    chunk_overlap  = 20,
    length_function = len,
    is_separator_regex = False,
)
docs = text_splitter.split_documents(data)
print(len(docs))

AttributeError: 'dict' object has no attribute 'page_content'

In [None]:
vector_store.add_documents(documents=docs)

In [None]:

docs = vector_store.similarity_search(
    query="When are the opening hours of the restaurant?",
    k=3,
    search_type="similarity",
)
print(docs)


[Document(page_content='Restaurant Opening Hours:', metadata={'id': 'M2E5MjZjMjMtOTliZC00MDE3LWFhNjgtM2EwZjYxMzc1NDlm', 'source': 'restaurant/opening_hours.txt'}), Document(page_content='Special Hours: Our kitchen closes 30 minutes before the restaurant closing time.', metadata={'id': 'MjJhMzg0NzktODg3My00YjkyLTg3ZWItZWQ2N2M1MTc5ODM0', 'source': 'restaurant/opening_hours.txt'}), Document(page_content='Monday to Thursday: 11:00 AM - 11:00 PM\nFriday: 11:00 AM - 12:00 AM (midnight)', metadata={'id': 'YjhkNGI0ZGMtMWQwMC00NDMzLTg0ODAtOGM2ZWRjNzgxODcx', 'source': 'restaurant/opening_hours.txt'})]


In [None]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=vector_store.as_retriever())
qa.run("When are the opening hours of the restaurant?")

  warn_deprecated(


"The restaurant's opening hours are Monday to Thursday from 11:00 AM to 11:00 PM, and on Friday from 11:00 AM to 12:00 AM (midnight)."