<a href="https://colab.research.google.com/github/zeeltrivedi1298/Contextual_Retrieval_based_RAG_System/blob/main/code_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import bs4
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter




# 1. Load, chunk and index the contents of the blog to create a retriever.
loader = WebBaseLoader(
    web_paths=("https://k21academy.com/ai-ml/azure/create-azure-openai-service-resources-using-console-cli-step-by-step-activity-guide/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)


In [None]:
docs = loader.load()
docs


In [None]:
docs = loader.load()
len(docs)


In [None]:
docs

In [None]:
from langchain_openai import AzureOpenAIEmbeddings




embedding = AzureOpenAIEmbeddings(
    model="deployment name",
    azure_endpoint='Your Endpoint/', #If not provided, will read env variable AZURE_OPENAI_ENDPOINT
    api_key= 'Your Key',
    openai_api_version= 'API Version',
)




In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = InMemoryVectorStore.from_documents(
    documents=splits, embedding=embedding
)


In [None]:
retriever = vectorstore.as_retriever

In [None]:
len(splits)

In [None]:
# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [None]:
response = rag_chain.invoke({"input": "Azure OpenAI Service?"})
response["answer"]


In [2]:
!pip install gradio



In [None]:

import gradio as gr

def qa_from_url(url, question):
    # 1. Load, chunk and index the contents of the blog to create a retriever.
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        ),
    )

    docs = loader.load()

    if not docs:
        return "No content found at the provided URL."

    embedding = AzureOpenAIEmbeddings(
        # configure Azure embeddings here
    )

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

try:
    response = rag_chain.invoke({"input": question})
    return response["answer"]
except Exception as e:
    return f"An error occurred: {e}"

iface = gr.Interface(
    fn=qa_from_url,
    inputs=[
        gr.Textbox(label="URL"),
        gr.Textbox(label="Question")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="Webpage Q&A",
    description="Ask a question about any webpage!",
)

iface.launch()


In [None]:
#We can provide the URL for the web page and the question here in Google Colab itself.
