# Using the Retrieval Augmented Generation (RAG) Framework to contextualize the response of a chatbot.


## Imports


In [1]:
!pip install -q python-dotenv langchain_elasticsearch langchain_community langchain_text_splitters langchain_core langchain langchain_openai langchain_huggingface

In [10]:
from dotenv import find_dotenv, load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import HumanMessage
from langchain.chains import create_history_aware_retriever
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain_core.tools import tool
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.agents import  AgentExecutor, create_react_agent
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import PromptTemplate
from langchain import hub
#from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
#from langchain_elasticsearch.vectorstores import ElasticsearchStore
from langchain_chroma import Chroma
import os

In [11]:
#load_dotenv(find_dotenv(), override=True)
os.environ["OPENAI_API_BASE"] = "http://10.35.151.101:8001/v1"
os.environ["OPENAI_API_KEY"] = "sk-1234"

## VectorDB setup and add embeddings to vector db


In [4]:
loader = PyPDFLoader("dgxa100-user-guide.pdf")#, extract_images=True)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,
                                      chunk_overlap=512,
                                      length_function=len,
                                      is_separator_regex=False,
                                      )

docs = text_splitter.split_documents(documents)

In [5]:
#embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
#embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
#vector_db = ElasticsearchStore.from_documents(
#    docs,
#    es_url="http://localhost:9200",
#    index_name="manual",
#    embedding=embedding
#)
#vector_db.client.indices.refresh(index="manual")

embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

vector_db = Chroma.from_documents(documents=docs, embedding=embedding, persist_directory="./vectordb")


  from tqdm.autonotebook import tqdm, trange


## Connecting vector db to LLM


In [6]:
retriever = vector_db.as_retriever()


In [7]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [12]:
llm = ChatOpenAI(model="llama3-8b",temperature=0.2)

#llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")


In [13]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
Keep your answers short and concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        #("system", qa_system_prompt),
        ("system", "{context}"),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [14]:
chat_history = []

i = 0
while i < 3:
    question = input("Ask a question")
    response = rag_chain.invoke({"input":question, "chat_history":chat_history})
    chat_history.extend([HumanMessage(content=question), response["answer"]])
    print(response["answer"])
    i += 1


Ask a question how to configure multi-instance GPU?


To configure Multi-Instance GPU (MIG) on an NVIDIA DGX A100, follow these steps:

1. Stop the NVSM and DCGM services:
```
$ sudo systemctl stop nvsm dcgm
```
2. Enable MIG on all eight GPUs in the system:
```
$ sudo nvidia-smi -mig 1
```
If other services are running that prevent you from resetting the GPUs, then reboot the system and skip the next step.

3. Restart the DCGM and NVSM services:
```
$ sudo systemctl start dcgm nvsm
```
Note: Before enabling MIG, make sure to terminate any system services that manage GPUs, as MIG requires a GPU reset.

After enabling MIG, you can use the NVIDIA Management Library (NVML) APIs or the command-line utility `nvidia-smi` to manage MIG instances. You can also refer to the MIG User Guide for more detailed information on key MIG concepts, deployment considerations, and how to create MIG instances and run Docker containers using MIG.


Ask a question how to update dgx a100 software?


According to the NVIDIA DGX A100 User Guide, you can update the software on your DGX A100 system by following these steps:

1. Run the package manager:
```
$ sudo apt update
```
2. Check to see which software will get updated:
```
$ sudo apt full-upgrade -s
```
3. Upgrade to the latest version:
```
$ sudo apt full-upgrade
```
4. Answer any questions that appear. Most questions require a Yes or No response. If asked to select the grub configuration to use, select the default option.

Additionally, if you have configured apt to use the NVIDIA DGX OS packages in the file `/etc/apt/sources.list.d/dgx-bionic-r450-cuda11-0-repo.list`, the NVIDIA graphics driver will be upgraded to the R450 driver and the package sources will be updated to obtain future updates from the R450 driver repositories.

Note: These instructions update all software for which updates are available from your configured software sources, including applications that you installed yourself. If you want to prevent an appli

Ask a question 


It seems like you didn't ask a question. If you have any questions or need further assistance with updating the software on your DGX A100 system, feel free to ask!


# Extending to Function Calling


## Setup Vector DB search retrieval as a tool.


In [15]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)

In [16]:
@tool
def query_manual(query):
    """
    Queries the manual and retrieves information from its contents. 
    Returns the result and the source documents.

    Args:
        query (string): A query derived from the question asked by the user.
    """
    result = qa.invoke(query)
    return result['result'], result['source_documents']

In [18]:
from langchain import hub

template = """
You are an assistant that answers questions on the manual provided.
Use the tools provided to respond accurately. 
The query_manual tool should be used to retrieve information from the manual.

For questions that require further information, use the tavily_search_tool_json tool to conduct research and respond
with accurate answers.

Question: {input}
"""

prompt_template = PromptTemplate.from_template(template=template)

agentprompt = hub.pull("hwchase17/react-chat")

#tools = [query_manual, TavilySearchResults(max_results=3)]
tools = [query_manual]

agent = create_react_agent(llm=llm,
                           tools=tools,
                           prompt=agentprompt)

agent_executor = AgentExecutor(agent=agent,
                               tools=tools,
                               handle_parsing_errors=True,
                               verbose=True)

In [19]:
print(agentprompt)

input_variables=['agent_scratchpad', 'chat_history', 'input', 'tool_names', 'tools'] metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'react-chat', 'lc_hub_commit_hash': '3ecd5f710db438a9cf3773c57d6ac8951eefd2cd9a9b2a0026a65a0893b86a6e'} template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assi

In [None]:
chat_history = []

i = 0
while i < 3:
    question = input("Ask a question")
    response = agent_executor.invoke({"input":prompt_template.format(input=question), "chat_history": chat_history})
    chat_history.extend({"user": question, "ai": response["output"]})
    print(response['output'])
    i += 1