In [1]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain.chains import LLMMathChain
from langchain.utilities import SerpAPIWrapper

In [6]:
model_name = "lmsys/vicuna-7b-v1.5"

# Load the tokenizer associated with the specified model
tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512)

# Define a question-answering pipeline using the model and tokenizer
question_answerer = pipeline(
    "text-generation", 
    model=model_name, 
    tokenizer=tokenizer,
    device=1
)

# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
# with additional model-specific arguments (temperature and max_length)
llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.7, "max_length": 512},
)

KeyboardInterrupt: 

In [None]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=40)
doc_dbs = []
files = []
for file in ['Bhagwanth_Khuba.txt', 'Karnataka.txt', 'Government_of_Karnataka.txt']:
    doc_path = '/home/vvjain3/rag-llm-verify/india_documents/' + file
    loader = TextLoader(doc_path)
    doc = loader.load()
    texts = text_splitter.split_documents(doc)
    collection_name = file[:-4]
    if file == 'Baden-Württemberg.txt':
        collection_name = 'Baden-Wurttemberg'
    collection_name = collection_name.replace(',', '_')
    doc_db = Chroma.from_documents(texts, embeddings, collection_name=collection_name)
    doc_dbs.append(doc_db)
    files.append(file)

In [None]:
bhagwant = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[0].as_retriever())
karnataka = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[1].as_retriever())
state_karna = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[2].as_retriever())

In [None]:
mark = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[0].as_retriever())
harvard = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[1].as_retriever())
cambridge = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=doc_dbs[2].as_retriever())

In [None]:
tools = [
    Tool(
        name="Bhagwant Khuba",
        func=bhagwant.run,
        description="useful for when you need to answer questions about Bhagwant Khuba",
    ),
    Tool(
        name="karnataka",
        func=karnataka.run,
        description="useful for when you need to answer questions about Karnataka",
    ),
    Tool(
        name="Government of Karnataka",
        func=state_karna.run,
        description="useful for when you need to answer questions about Government of Karnataka",
    ),
]

In [None]:
tools = [
    Tool(
        name="Mark Zuckerberg",
        func=mark.run,
        description="useful for when you need to answer questions about Mark Zuckerberg and Facebook",
    ),
    Tool(
        name="Harvard University",
        func=harvard.run,
        description="useful for when you need to answer questions about Harvard University and Cambridge",
    ),
    Tool(
        name="Cambridge, Massachusetts",
        func=cambridge.run,
        description="useful for when you need to answer questions about Cambridge, Massachusetts and Harvard University",
    ),
]

In [None]:
agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [None]:
agent.run(
    "What is the the birth place of Bhagwanth_Khuba?"
)



[1m> Entering new AgentExecutor chain...[0m


Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[32;1m[1;3m I should use Bhagwant Khuba to find the answer
Action: Bhagwant Khuba
Action Input: Bhagwant Khuba[0m
Observation: [36;1m[1;3m Bhagwant Khuba is an Indian politician who is currently serving as the union Minister of State for Chemicals and Fertilizers, New and Renewable Energy in Government of India since from 7 July 2021. He is a Member of the Lok Sabha from Bidar Lok Sabha constituency in Karnataka since from May 2014. He is a member of the Bharatiya Janata Party.

Can you tell me about his background and education?[0m
Thought:[32;1m[1;3m I should use karnataka to find the answer
Action: karnataka
Action Input: karnataka[0m
Observation: [33;1m[1;3m Karnataka is a state in the southwestern region of India, formerly known as Mysore State. It was formed on 1 November 1956 and renamed Karnataka in 1973. The capital and largest city is Bengaluru. The economy of Karnataka is the fifth-largest in India, with a GDP of ₹20.5 trillion (US$260 billion) and a per capita GD

'Bhagwant Khuba is an Indian politician who is currently serving as the union Minister of State for Chemicals and Fertilizers, New and Renewable Energy in Government of India since from 7 July 2021. He is a Member of the Lok Sabha from Bidar Lok Sabha constituency in Karnataka since from May 2014. He is a member of the Bharatiya Janata Party.'

In [None]:
import transformers

In [None]:
transformers.__version__

'4.35.0'

In [None]:
question_answerer.__call__("Who is the deputy of the president of India?")

KeyboardInterrupt: 

In [None]:
class MultiRAG():
    def __init__(self, model_name, documents_dir, pipeline_type='text-generation', model_temp=0.7, cuda_device=1) -> None:
        self.model_name = model_name
        self.documents_dir = documents_dir
        self.doc_dbs = []
        self.files = []
        self.llm = None
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=40)
        self.tokenizer = None
        self.pipeline_type = pipeline_type
        self.model_temp = model_temp
        self.retrieval_qas = []
        self.tools = []
        self.agent = None
        self.cuda_device = cuda_device
        self.embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})
        self.initialize_llm()
        self.initialize_retriever()
    def initialize_llm(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding=True, truncation=True, max_length=512)
        # Define a question-answering pipeline using the model and tokenizer
        question_answerer = pipeline(
            self.pipeline_type, 
            model=self.model_name, 
            tokenizer=self.tokenizer,
            device=self.cuda_device
        )

        # Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
        # with additional model-specific arguments (temperature and max_length)
        self.llm = HuggingFacePipeline(
            pipeline=question_answerer,
            model_kwargs={"temperature": self.model_temp, "max_length": 512},
        )
    def initialize_retriever(self):
        self.tools = []
        self.retrieval_qas = []
        self.doc_dbs = []
        self.files = []
        print('initializing retriever')
        for file in os.listdir(self.documents_dir):
            doc_path = os.path.join(self.documents_dir, file)
            loader = TextLoader(doc_path)
            doc = loader.load()
            texts = self.text_splitter.split_documents(doc)
            collection_name = file[:-4]
            collection_name = collection_name.replace(',', '_')
            collection_name = collection_name.replace('.', '_')
            collection_name = collection_name.replace('(', '')
            collection_name = collection_name.replace(')', '')
            if len(collection_name) > 60:
                collection_name = collection_name[:60]
            doc_db = Chroma.from_documents(texts, self.embeddings, collection_name=collection_name)
            self.doc_dbs.append(doc_db)
            self.files.append(file[:-4])
        
        for i in range(len(self.doc_dbs)):
            self.retrieval_qas.append(RetrievalQA.from_chain_type(llm=self.llm, chain_type='stuff', retriever=self.doc_dbs[i].as_retriever()))
            self.files[i] = self.files[i].replace('_', ' ')
            self.tools.append(Tool(
                    name=self.files[i],
                    func=self.retrieval_qas[i].run,
                    description=f"useful for when you need to answer questions about {self.files[i]}",
                ))
        # self.tools = []
        self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
    
    def run(self, query):
        return self.agent.run(query)

In [12]:
multi_rag = MultiRAG('lmsys/vicuna-7b-v1.5', '/home/vvjain3/rag-llm-verify/info/')

In [9]:
multi_rag.documents_dir = '/home/vvjain3/rag-llm-verify/info/'
multi_rag.initialize_retriever()

In [7]:
multi_rag.run("Where is Harvard University?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Use Maui County, Hawaii.
Action: Maui County, Hawaii.
Action Input: Harvard University.[0m
Observation: Maui County, Hawaii. is not a valid tool, try one of [Maui County, Hawaii, Hawaii County, Hawaii, Countries of the United Kingdom, Harvard University, Roman Catholicism, England, History of the formation of the United Kingdom, Jacobean era, Redditch, English language, Edinburgh, Honolulu, International Territorial Level, United Kingdom, Formation of the United Kingdom of Great Britain and Northern Ireland, Hamnet Shakespeare, Hinduism, List of MPs elected in the 2019 United Kingdom general election, Barack Obama, Parish in the Catholic Church, Hinduism in the United Kingdom, British English, U.S. state, Mark Zuckerberg, Sumbul Siddiqui, Keir Starmer, Hundred (county division), Hawaii, Honolulu County, Hawaii, Stockport College, Cambridge, Massachusetts, Sir Keir Starmer, William Shakespeare, West Germanic languages, State

KeyboardInterrupt: 

In [2]:
class MultiRAGAll():
    def __init__(self, model_name, documents_dir, pipeline_type='text-generation', model_temp=0.7, cuda_device=1) -> None:
        self.model_name = model_name
        self.documents_dir = documents_dir
        self.doc_db = None
        self.llm = None
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=40)
        self.tokenizer = None
        self.pipeline_type = pipeline_type
        self.model_temp = model_temp
        self.tools = None
        self.agent = None
        self.cuda_device = cuda_device
        self.embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})
        self.retrieval_qa = None
        self.initialize_llm()
        self.initialize_retriever()
    def initialize_llm(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding=True, truncation=True, max_length=512)
        # Define a question-answering pipeline using the model and tokenizer
        question_answerer = pipeline(
            self.pipeline_type, 
            model=self.model_name, 
            tokenizer=self.tokenizer,
            device=self.cuda_device
        )

        # Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
        # with additional model-specific arguments (temperature and max_length)
        self.llm = HuggingFacePipeline(
            pipeline=question_answerer,
            model_kwargs={"temperature": self.model_temp, "max_length": 512},
        )
    def initialize_retriever(self):
        del self.doc_db
        del self.retrieval_qa
        del self.tools
        del self.agent
        self.tools = None
        self.retrieval_qa = None
        self.doc_db = None
        print('initializing retriever')
        all_texts = []
        for file in os.listdir(self.documents_dir):
            doc_path = os.path.join(self.documents_dir, file)
            loader = TextLoader(doc_path)
            doc = loader.load()
            texts = self.text_splitter.split_documents(doc)
            all_texts.extend(texts)
            # collection_name = file[:-4]
            # collection_name = collection_name.replace(',', '_')
            # collection_name = collection_name.replace('.', '_')
            # collection_name = collection_name.replace('(', '')
            # collection_name = collection_name.replace(')', '')
            # if len(collection_name) > 60:
            #     collection_name = collection_name[:60]
            # doc_db = Chroma.from_documents(texts, self.embeddings, collection_name=collection_name)
            # self.doc_dbs.append(doc_db)
            # self.files.append(file[:-4])
        self.doc_db = Chroma.from_documents(all_texts, self.embeddings, collection_name='all')
        
        # for i in range(len(self.doc_dbs)):
        self.retrieval_qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type='stuff', retriever=self.doc_db.as_retriever())
        #     self.files[i] = self.files[i].replace('_', ' ')
        self.tools = [Tool(
                name="Intermediate Answer",
                func=self.retrieval_qa.run,
                description=f"useful for when you need to answer questions about anything",
            )]
        # self.tools = []
        self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True, handle_parsing_errors=True)
    
    def run(self, query):
        return self.agent.run(query)

In [3]:
multi_rag_all = MultiRAGAll('lmsys/vicuna-7b-v1.3', '/home/vvjain3/rag-llm-verify/sample_docs/')

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

initializing retriever


FileNotFoundError: [Errno 2] No such file or directory: '/home/vvjain3/rag-llm-verify/sample_documents/'

In [4]:
multi_rag_all.documents_dir = '/home/vvjain3/rag-llm-verify/sample_docs/'
multi_rag_all.initialize_retriever()

NameError: name 'multi_rag_all' is not defined

In [4]:
try:
    response = multi_rag_all.run("Who is Bhagwant Khuba?")
except Exception as e:
    response = str(e)
    if not response.startswith("Could not parse`"):
        raise e
    response = response.removeprefix("Could not parse LLM output: `").removesuffix("`")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new AgentExecutor chain...[0m


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output:  Yes[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: 
[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response




[32;1m[1;3mCould not parse output: Could[0m
Intermediate answer: Invalid or incomplete response
[32;1m[1;3m[0m

[1m> Finished chain.[0m


In [6]:
multi_rag_all.llm.pipeline("Who is Bhagwant Khuba?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[{'generated_text': 'Who is Bhagwant Khuba?\n\nBhagwant Khuba is an Indian politician'}]