## Installing required libraries

In [1]:
!pip install --quiet --upgrade langchain langchain-community gradio pypdf langchain-openai faiss-cpu langchain-google-genai pillow
!pip install --quiet tavily-python

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.1/320.1 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## loading credentials from .env

In [2]:
# import os
# from dotenv import load_dotenv

# load_dotenv('azure_credentials.env')

# EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME")
# EMBEDDING_ENDPOINT = os.getenv("EMBEDDING_ENDPOINT")
# EMBEDDING_API_VERSION = os.getenv("EMBEDDING_API_VERSION")
# EMBEDDING_API_KEY = os.getenv("EMBEDDING_API_KEY")
# AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
# API_VERSION = os.getenv("API_VERSION")
# AZURE_ENDPOINT = os.getenv("AZURE_ENDPOINT")

In [5]:
from google.colab import userdata

EMBEDDING_MODEL_NAME = userdata.get("EMBEDDING_MODEL_NAME")
EMBEDDING_ENDPOINT = userdata.get("EMBEDDING_ENDPOINT")
EMBEDDING_API_VERSION = userdata.get("EMBEDDING_API_VERSION")
EMBEDDING_API_KEY = userdata.get("EMBEDDING_API_KEY")
AZURE_OPENAI_API_KEY = userdata.get("AZURE_OPENAI_API_KEY")
API_VERSION = userdata.get("api_version")
AZURE_ENDPOINT = userdata.get("AZURE_ENDPOINT")

In [6]:
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

if "TAVILY_API_KEY" not in os.environ:
    os.environ["TAVILY_API_KEY"] = userdata.get("TAVILY_API_KEY")

## Importing required libraries

In [30]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import FAISS

from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.runnables import RunnableParallel

from langchain_core.prompts import ChatPromptTemplate

from langchain_community.document_loaders import PyPDFLoader

from langchain_google_genai import ChatGoogleGenerativeAI

from utils import *

In [8]:
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent, create_openai_functions_agent

from langchain_community.tools.tavily_search import TavilySearchResults

## Defining LLMs

In [40]:
llm = AzureChatOpenAI(
    api_key=AZURE_OPENAI_API_KEY  ,
    api_version=API_VERSION,
    azure_endpoint = AZURE_ENDPOINT
    )

llm_2 = AzureChatOpenAI(
    api_key=AZURE_OPENAI_API_KEY  ,
    api_version=API_VERSION,
    azure_endpoint = AZURE_ENDPOINT
    )

gemini_llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest")

In [10]:
embeddings = AzureOpenAIEmbeddings(
    model=EMBEDDING_MODEL_NAME,
    azure_endpoint= EMBEDDING_ENDPOINT,
    api_key= EMBEDDING_API_KEY,
    openai_api_version= EMBEDDING_API_VERSION
)

In [11]:
tools = [TavilySearchResults(max_results=1)]
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")



## Prompts

In [31]:
from utils import prompt_ as rag_prompt, simple_prompt

## Getting Vector DB's Retriever

In [13]:
retriever = await get_retriever(["book_name.pdf"], embeddings=embeddings)

## Chains

In [34]:
# Construct the OpenAI Functions agent
agent = create_openai_functions_agent(llm_2, tools, prompt)
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools)

In [118]:
from utils import get_rag_chain


# Define the second chain with LLM 2
rag_chain_1 = get_rag_chain(retriever, gemini_llm, rag_prompt)   ## Gem with Rag
rag_chain_2 = get_rag_chain(retriever, llm_2, rag_prompt)         ## 4o with Rag
# gemini_chain = get_rag_chain(retriever, gemini_llm, simple_prompt) ## Gem Simple

rag_imp = get_rag_chain_custom(retriever, llm_2, llm, rag_prompt)

# Define the third chain with LLM, without RAG
chain3 = (                                                        ## 4o Raw
    simple_prompt
    | llm_2  # Second LLM instance
    | StrOutputParser()
)

chain4 = (
    simple_prompt
    | gemini_llm  # Second LLM instance
    | StrOutputParser()
)

# Run all chains in parallel with RunnableParallel
parallel_chain = RunnableParallel(
    {"GemRag": rag_chain_1, "GPTRag": rag_chain_2,
     "GPT": chain3, "Gem":chain4, "CrazyRag":rag_imp}
)


# Gradio

In [123]:
import gradio as gr

# List for storing prev asked questions

In [124]:
qa_history = []

In [133]:
import csv
import os


In [135]:
def write_or_append_to_csv(file_name, data, headers=None):

    file_exists = os.path.exists(file_name)

    with open(file_name, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # If the file does not exist, write headers (if provided)
        if not file_exists and headers:
            writer.writerow(headers)

        # Write the data rows
        writer.writerows(data)

In [150]:
def save_history(question, resp1, resp2):
    file_name = "data.csv"
    headers = ["questions"]+list(resp1.keys())+["Web Search"]

    write_or_append_to_csv(file_name, [[question]+ list(resp1.values())+ [resp2]], headers)


In [155]:
# Function to call the chains and storing the answers in list.
def generate_response(message, _):
    global Questions_Answers
    responses = parallel_chain.invoke(message)

    web_response = agent_executor.invoke({"input":message})

    save_history(message, responses, web_response['output'])

    return responses["GPTRag"], responses["GPT"], responses["GemRag"], responses["Gem"], web_response['output'], responses["CrazyRag"]

In [159]:
def search_csv(search_term,file_name = "data.csv"):

    if not os.path.exists(file_name):
        raise FileNotFoundError(f"The file '{file_name}' does not exist.")

    filtered_results = []

    # Read the CSV and search for the term
    with open(file_name, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader, None)  # Read headers if they exist

        for row in reader:
            # Assume the question is in the first column
            if search_term.lower() in row[0].lower():
                filtered_results.append(tuple(row))

    return filtered_results

### Gradio Interface

In [160]:

# Gradio Interface for Chatbot Tab
with gr.Blocks(fill_width=True, fill_height=True) as chatbot_tab:
    gr.Markdown("### Chatbot")

    # Input box for user question
    question_input = gr.Textbox(label="Ask a question")

    # Button to submit question
    submit_button = gr.Button("Submit")


    gr.HTML("<hr>")
    gr.Markdown("**GPT Rag**")
    response1 = gr.Markdown("")


    gr.HTML("<hr>")
    gr.Markdown("**GPT Simple**")
    response2 = gr.Markdown("")

    gr.HTML("<hr>")
    gr.Markdown("**Gemini Rag**")
    response3 = gr.Markdown("")

    gr.HTML("<hr>")
    gr.Markdown("**Gemini Simple**")
    response4 = gr.Markdown("")


    gr.HTML("<hr>")

    gr.Markdown("**Web Search**")
    response5 = gr.Markdown("")

    gr.HTML("<hr>")
    gr.Markdown("**Noobie Boobie Rag**")
    response6 = gr.Markdown("")

    gr.HTML("<hr>")


    # Connect question input to chatbot function and display response
    submit_button.click(fn=generate_response,
                        inputs=question_input, outputs=[response1, response2, response3, response4, response5, response6])


# Gradio Interface for Previous Responses Tab
with gr.Blocks() as history_tab:
    gr.Markdown("### Previous Responses")

    # Search bar to filter questions
    search_input = gr.Textbox(label="Search questions")

    # Display previous questions in rows
    questions_output = gr.Dataframe(headers=["Question", "Response"], interactive=False)

    # Update questions output based on search term
    search_input.change(fn=search_history, inputs=search_input, outputs=questions_output)


# Combine the tabs into a single interface
with gr.Blocks() as app:
    with gr.Tabs():
        with gr.TabItem("Chatbot"):
            chatbot_tab.render()

        with gr.TabItem("Previous Responses"):
            history_tab.render()



In [161]:
app.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://94e3583ac60c0abb52.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


