In [1]:
from typing import List
from langchain_core.documents import Document
import json

class LoadDocument():
    def __init__(self,file_path) -> None:
        self.file_path = file_path

    def load_json_file(self) -> List[Document]:
        with open(self.file_path, 'r',encoding='utf-8') as f:
            self.docs = json.load(f)
        zc_docs = []
        zp_docs = []
        for doc in self.docs:
            if doc['metadata']['tag']['product'] == "Zmanda-Classic":
                data = [Document(page_content=doc['content'],metadata={"source": doc['source']})]
                zc_docs.extend(data)
            else:
                data = [Document(page_content=doc['content'],metadata={"source": doc['source']})]
                zp_docs.extend(data)

        return zc_docs,zp_docs

In [2]:
#from load_data import LoadDocument
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain_google_genai import ChatGoogleGenerativeAI
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# Get the Gemini API key
gemini_api_key = user_secrets.get_secret("Gemini_API_Key")

os.environ["GOOGLE_API_KEY"] = gemini_api_key

In [3]:
document1 = LoadDocument("/kaggle/input/chat-data/output_docs_zmanda_2024-09-23_03-15-39.json")
document2 = LoadDocument("/kaggle/input/chat-data/output_kb_zmanda_2024-08-27_11-35-49.json")
data1,data2 = document1.load_json_file()
print("Processed document 1")
data3,data4 = document2.load_json_file()
print("Processed document 2")

Processed document 1
Processed document 2


In [4]:
zc_docs = data1+data3
zp_docs = data2+data4
del data1,data2,data3,data4

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
zc_splits = text_splitter.split_documents(zc_docs)
zp_splits = text_splitter.split_documents(zp_docs)

In [6]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
zc_vectorstore = Chroma.from_documents(documents=zc_splits, embedding=embeddings)
zc_retriever = zc_vectorstore.as_retriever(k=10)

In [7]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
zp_vectorstore = Chroma.from_documents(documents=zp_splits, embedding=embeddings)
# k is the number of chunks to retrieve
zp_retriever = zp_vectorstore.as_retriever(k=10)

In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
output_parser = StrOutputParser()

system = """
Your Name: Zam

Your Role: A highly efficient and user-friendly chatbot. Your primary role is to assist users by answering their queries related to Zmanda Pro. Always provide clear, detailed, and step-by-step instructions to address user concerns or guide them through processes.

Your Knowledge Base: I have access to extensive documentation and resources on Zmanda Pro. I can guide you through step-by-step instructions for common tasks, troubleshoot issues, and point you in the right direction for more complex problems.

Guidelines:
1. Whenever the context includes "cometbackup" or "docs.cometbackup.com," replace those terms with "Zmanda Pro" while responding to the user.
2. Focus on providing precise and actionable steps tailored to the user's requirements.
3. Prioritize providing clear step-by-step instructions when possible.44
4. Use simple, easy-to-understand language, avoiding unnecessary technical jargon unless the user explicitly requests advanced details.  
5. If the query is outside the scope of Zmanda Pro, inform the user courteously and suggest possible next steps or resources.  
6. Maintain a professional yet approachable tone to
7. Offer additional resources if needed.


Previous conversation:
{chat_history}
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)


# Combine prompt, memory, and LLM into a RunnableSequence
llmchain = prompt | llm  

docs.cometbackup.com

In [9]:
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import Optional, Literal

class DecisionOutput(BaseModel):
    """
    Represents the agent's decision on how to handle a query.
    """
    use_retrieval: bool = Field(
        description="Whether to use retrieval tool or answer directly"
    )
    reasoning: str = Field(
        description="Explanation for the decision to use or not use retrieval"
    )
    retriever_name: Optional[Literal["zmanda classic", "zmanda pro"]] = Field(
        default=None,
        description="Name of the retriever to be used if retrieval is required, otherwise None"
    )

class DynamicRetrievalAgent:
    def __init__(self,):
        """
        Initialize the dynamic retrieval agent.
        
        :param llm: Language model to use for decision-making and answering
        :param retrieval_tools: List of retrieval tools available to the agent
        """
        self.llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
        self.retrieval_tools = [zp_retriever,zc_retriever]
        self.zam_llm = llmchain
        
        # Decision-making prompt
        self.decision_prompt = PromptTemplate(
            template="""You are an intelligent agent deciding how to best answer a user query.
            
Determine whether you should use a retrieval tool or answer directly based on these criteria:
1. If the query requires up-to-date or specific factual information not in your training
2. If the query is about a complex topic needing comprehensive research
3. If direct knowledge might be insufficient or potentially outdated
4. Review the chat history to see if the user's query has already been addressed. If the answer is found, provide it directly from the chat history.

If you determine that retrieval is required, select the retriever name as follows:
- If the user query mentions 'zmanda classic', set the retriever name to 'zmanda classic'.
- Otherwise, set the retriever name to 'zmanda pro'.

User Query: {query}

chat_history: {chat_history}
{format_instructions}
            """,
            input_variables=["query","chat_history"],
            partial_variables={"format_instructions": PydanticOutputParser(pydantic_object=DecisionOutput).get_format_instructions()}
        )
        
        # Output parser for decision
        self.decision_parser = PydanticOutputParser(pydantic_object=DecisionOutput)
    
    def make_decision(self, query: str,chat_history: str) -> DecisionOutput:
        """
        Decide whether to use retrieval based on the query.
        
        :param query: User's query
        :return: Decision output with retrieval recommendation
        """
        decision_chain = self.decision_prompt | self.llm | self.decision_parser
        return decision_chain.invoke({"query": query , "chat_history": chat_history})

    def rag_response(self,query: str, chat_history: str,retriver_name: str ):
        if retriver_name == "zmanda classic":
            documents = self.retrieval_tools[1].invoke(query)
        else:    
            documents = self.retrieval_tools[0].invoke(query)
        #print(documents[0].metadata)
        context = "\n\n".join([doc.page_content for doc in documents])
        sources = [doc.metadata for doc in documents]
        sources = [source['source'] for source in sources]
        prompt = f"""
                     Context:
                     {context}
        
                     Question:
                     {query}
        
                     Answer: Provide a direct answer to the query.
                   """
        # Generate response using the language model
        print("Document Processed Sending it to the LLM")
        response = self.zam_llm.invoke({"chat_history" : chat_history , "question" : query})
        response.content = response.content + "\n" + str(set(sources))
        return response
    
    def process_query(self, query: str , chat_history: str) -> str:
        """
        Process the query by deciding retrieval strategy.
        
        :param query: User's query
        :return: Answer to the query
        """
        # Make decision about retrieval
        decision = self.make_decision(query,chat_history)
        
        # Log the decision (in a real-world scenario, you might use proper logging)
        print(f"Decision : {decision.use_retrieval} Decision : {decision.retriever_name} Decision Reasoning: {decision.reasoning}")
        
        if decision.use_retrieval:
            return self.rag_response(query,chat_history,decision.retriever_name)
        else:
            # Answer directly using the language model
            return self.zam_llm.invoke({"chat_history" : chat_history , "question" : query})

In [10]:
agent = DynamicRetrievalAgent()

In [11]:
chat_history = ""
print("ChatBot: Hi! i am Zam,what can i help you with today")
while True:
    query=input("You: ")
    if query == "exit":
        break
    result = agent.process_query(query , chat_history)
    chat_history = chat_history + f"Human: {query}\n" + f"AI: {result.content}"
    print("ChatBot: ",result.content)
    

ChatBot: Hi! i am Zam,what can i help you with today


You:  Hie


Decision : False Decision : None Decision Reasoning: The query is a simple greeting and doesn't require retrieval.
ChatBot:  Hello! How can I help you today?



You:  What is zmanda


Decision : True Decision : zmanda pro Decision Reasoning: The query requires specific factual information about "zmanda" which might be complex and require comprehensive research. Direct knowledge might be insufficient or potentially outdated.
Document Processed Sending it to the LLM
ChatBot:  Zmanda is a robust and comprehensive backup and disaster recovery solution designed to protect your critical data.  It offers a centralized management console, making it easy to manage backups for a variety of systems, including servers, desktops, and applications.  Zmanda supports various backup destinations, such as tape, disk, optical media, and cloud storage.  It's known for its reliability, flexibility, and cost-effectiveness, making it a popular choice for businesses of all sizes.

Are you interested in a specific aspect of Zmanda, such as its features, pricing, or how to perform a particular task?  Let me know how I can help you further.

{'https://docs.zmanda.com/v/zmanda-classic/getting-

You:  what is zmanda classic


Decision : True Decision : zmanda classic Decision Reasoning: The user is asking for a specific product "zmanda classic". Although the previous turn discussed "zmanda", it did not specifically address "zmanda classic".  Therefore, retrieval is needed to provide more targeted information.
Document Processed Sending it to the LLM
ChatBot:  Zmanda Classic is a robust and well-established backup and recovery solution offered by Zmanda. It provides comprehensive data protection for a wide range of systems and applications.  Think of it as the original, powerful engine that has been reliably protecting data for years.

Here's a breakdown of what makes Zmanda Classic stand out:

* **Flexibility:** Zmanda Classic supports a variety of operating systems (Linux, Solaris, Windows), databases (MySQL, PostgreSQL, Oracle), and applications.  This makes it a versatile solution for diverse IT environments.
* **Multiple Backup Destinations:** You can back up your data to tape, disk, optical media, or c

You:  exit


In [12]:
print(chat_history)

Human: Hie
AI: Hello! How can I help you today?
Human: What is zmanda
AI: Zmanda is a robust and comprehensive backup and disaster recovery solution designed to protect your critical data.  It offers a centralized management console, making it easy to manage backups for a variety of systems, including servers, desktops, and applications.  Zmanda supports various backup destinations, such as tape, disk, optical media, and cloud storage.  It's known for its reliability, flexibility, and cost-effectiveness, making it a popular choice for businesses of all sizes.

Are you interested in a specific aspect of Zmanda, such as its features, pricing, or how to perform a particular task?  Let me know how I can help you further.

{'https://docs.zmanda.com/v/zmanda-classic/getting-started-with-zmanda/cloud-based-trial-environment', 'https://docs.zmanda.com/', 'https://docs.zmanda.com/v/zmanda-classic/readme/zmanda-video-walkthrough', 'https://docs.zmanda.com/v/zmanda-classic/getting-started-with-zm

In [None]:
def response(query,chat_history):
  result , source = rag_response.invoke(query)
  result['text'] = result['text'] + "\n"+"Here are the links that you refer\n"+source
  chat_history.extend([(query,result['text'])])
  return "",chat_history

def clear_function():
    memory.clear()

In [None]:
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=540) #just to fit the notebook
    msg = gr.Textbox(label="Prompt")
   # with gr.Accordion(label="Advanced options",open=False):
   #    system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
   #     temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(response, inputs=[msg,chatbot], outputs=[msg, chatbot])
    clear.click(fn=clear_function)
    msg.submit(response, inputs=[msg,chatbot], outputs=[msg, chatbot]) #Press enter to submit

gr.close_all()
demo.queue().launch(share=True,debug=True)

In [None]:
import re

# Input string
input_string = "Some text {'https://docs.zmanda.com/start-here/step-7-checking-backup-job-status', 'https://docs.zmanda.com/protected-items/files-and-folders', 'https://docs.zmanda.com/start-here/step-6-initiating-a-backup', 'https://docs.zmanda.com/start-here/step-1-adding-a-user'} more text"

# Remove everything between { and }
output_string = re.sub(r"\{.*?\}", "", input_string)

# Print the result
print(output_string.strip())
