In [3]:
import os
from langchain.chat_models import init_chat_model

os.environ["AZURE_OPENAI_API_KEY"] = AZURE_OPENAI_API_KEY
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_OPENAI_ENDPOINT
os.environ["OPENAI_API_VERSION"] = OPENAI_API_VERSION

model = init_chat_model(
    "azure_openai:gpt-4o",
    azure_deployment=AZURE_OPENAI_CHAT_DEPLOYMENT,
)


In [4]:
import getpass
import os
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VERSION,
)

In [5]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

In [None]:
embedding_dim = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [None]:
# LOAD DOCUMENTS
from langchain_community.document_loaders import WebBaseLoader
url1 = "https://www.primeloans.kotak.com/chargesFees.htm"
url2 = "https://www.primeloans.kotak.com/newCarFinance.htm"

# loader = WebBaseLoader(url)
loader_multiple_pages = WebBaseLoader(
    [url1, url2]
)

docs = loader_multiple_pages.load()

print(docs[0])
print(docs[0].metadata)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

In [None]:
document_ids = vector_store.add_documents(documents=all_splits)
print(document_ids[:3])
vector_store.save_local("faiss_index")

In [7]:
vector_store = FAISS.load_local(
    "faiss_index", embeddings, allow_dangerous_deserialization=True
)


In [8]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=3)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [9]:
from langchain.agents import create_agent


tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from a blog post. "
    "Use the tool to help answer user queries."
)
agent = create_agent(model, tools, system_prompt=prompt)

In [10]:
query = (
    "What is the Clearing Mandate swap Charges?"
    # "what are the Indicative ROI for Loans **",
    # "What is the Margin Money Scheme?"
)


for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


What is the Clearing Mandate swap Charges?
Tool Calls:
  retrieve_context (call_BaygRNXRuMhp56wjAS6bukMf)
 Call ID: call_BaygRNXRuMhp56wjAS6bukMf
  Args:
    query: Clearing Mandate swap Charges
Name: retrieve_context

Source: {'source': 'https://www.primeloans.kotak.com/chargesFees.htm', 'title': 'Charges & Fees', 'description': '', 'language': 'No language found.', 'start_index': 2738}
Content: 1	Issue of Duplicate copy of the Agreement / Duplicate NOC	Rs.885/-
2	Cancellation of Contract (other than foreclosure and prepayment charges ) at specific request of the Borrower and agreed by the Lender Rs.25000/-.
3	Collection Charges for Clearing Mandate (per mandate)	At Actuals
4	Clearing Mandate swap Charges 	Rs.500 per swap
5	Repayment Schedule/Account Outstanding Break up statement ( Physical)	Rs.885/-
6	Part / Full Pre-payment charges ( Non-individual borrowers for all purposes and by individual borrowers for business use)	Fees on Part or Full Prepayment â€“ Charges of 4% + GST on th