In [1]:
import os
import re
import bs4
import tiktoken
from tqdm.auto import tqdm, trange
from langchain import hub
from langchain.load import dumps, loads
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_chroma import Chroma
from langchain_community.llms import LlamaCpp
from langchain_community.document_loaders import WebBaseLoader

work_directory = '/home/boyuan/RAG'
model_path = '/home/boyuan/Llama-2-7b-chat-hf/Llama-2-7b-chat-hf.gguf'
embedding_model = 'sentence-transformers/all-MiniLM-L6-v2'
embedding_device = 'cpu'
docs_path = '/home/boyuan/RAG/sae_story.pdf' # './gaudi3_story.pdf'
db_name = 'db_sae_story' # 'db_gaudi3_story'

class RAG:
    def __init__(self, model_path, docs_path, embedding_model, embedding_device, db_name):
        # load llm model
        self.llm = LlamaCpp(model_path=model_path, n_gpu_layers=100, n_batch=512, n_ctx=2048, f16_kv=True,
            callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=False)
        # load embedding model
        embedding = HuggingFaceEmbeddings(model_name=embedding_model, model_kwargs={'device': embedding_device})
        # load data source
        docs = self.__docs_loader(docs_path)
        # split data by chunk with tiktoken encoder
        splits = self.__spliter(docs)        
        # create vector store DB        
        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory=db_name)
        # create retriever
        retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 1})
        # set prompt
        template = """Answer the question based only on the following contenxt:{context}
        Question: {question}
        """
        prompt = ChatPromptTemplate.from_template(template)
        self.chain = (
            {'context': retriever, 'question': RunnablePassthrough()}
            | prompt
            | self.llm
            | StrOutputParser()
        )
        # set multi-query
        template_multi_queries = """You are an AI language model assistant. Your task is to generate three 
        different versions of the given user question to retrieve relevant documents from a vector 
        database. By generating multiple perspectives on the user question, your goal is to help
        the user overcome some of the limitations of the distance-based similarity search. 
        Provide these alternative questions separated by newlines. Original question: {question}
        """
        prompt_perspectives = ChatPromptTemplate.from_template(template_multi_queries)
        generate_queries = (
            prompt_perspectives 
            | self.llm
            | StrOutputParser() 
            | (lambda x: x.split("\n"))
        )
        
        self.multi_chain = generate_queries | retriever.map() | self.__get_unique_union
        
    def convert_tokens(self, s, encoding_name='cl100k_base'):
        encoding = tiktoken.get_encoding(encoding_name)
        res = encoding.encode(s)
        return res
        
    def __docs_loader(self, path):
        if os.path.isfile(path): # pdf file
            file_name = os.path.basename(path)
            extension = file_name.split('.')[1]
            if extension == 'pdf':
                loader = PyMuPDFLoader(path)
                res = loader.load()
                return res
            else:
                print('Error: Not pdf file.')
        elif path.startswith('http') or path.startswith('https'): # webpage link   
            bs4_strainer = bs4.SoupStrainer(class_=('post-content', 'post-title', 'post-header')) # Only keep post title, headers, and content
            loader = WebBaseLoader(web_paths=(path,), bs_kwargs={"parse_only": bs4_strainer})
            res = loader.load()
            return res
        else:
            print('Error: Not pdf or website start with http or https')

    def __spliter(self, docs):
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(encoding_name='cl100k_base', chunk_size=20, chunk_overlap=0)
        splits = text_splitter.split_documents(docs)
        return splits

    def __get_unique_union(self, documents):
        """ Unique union of retrieved docs """
        # Flatten list of lists, and convert each Document to string
        flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
        # Get unique documents
        unique_docs = list(set(flattened_docs))
        return [loads(doc) for doc in unique_docs]
        
rag = RAG(model_path, docs_path, embedding_model, embedding_device, db_name)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
rag.llm('Tell me about Brian')

  warn_deprecated(


 K. Vaughan's run on the Avengers
Brian K. Vaughan is a comic book writer and novelist known for his complex, character-driven stories and intricate plots. He has written a wide range of works, including the critically acclaimed series "Ex Machina," "Runaways," and "Saga." In 2004, Vaughan took on one of the biggest jobs in comics: writing the Avengers for Marvel Comics.

Vaughan's run on the Avengers lasted from 2004 to 2007, during which time he worked alongside artist George Pérez and other collaborators. During his tenure, Vaughan oversaw a number of significant changes to the team, including the introduction of several new members and the departure of some longtime Avengers.

One of the most notable aspects of Vaughan's run on the Avengers was his willingness to shake up the status quo and challenge the team's traditional dynamics. He introduced a number of new characters, including the shape-shifting Skrulls, who quickly became one of the team's greatest enemies, and the en

' K. Vaughan\'s run on the Avengers\nBrian K. Vaughan is a comic book writer and novelist known for his complex, character-driven stories and intricate plots. He has written a wide range of works, including the critically acclaimed series "Ex Machina," "Runaways," and "Saga." In 2004, Vaughan took on one of the biggest jobs in comics: writing the Avengers for Marvel Comics.\n\nVaughan\'s run on the Avengers lasted from 2004 to 2007, during which time he worked alongside artist George Pérez and other collaborators. During his tenure, Vaughan oversaw a number of significant changes to the team, including the introduction of several new members and the departure of some longtime Avengers.\n\nOne of the most notable aspects of Vaughan\'s run on the Avengers was his willingness to shake up the status quo and challenge the team\'s traditional dynamics. He introduced a number of new characters, including the shape-shifting Skrulls, who quickly became one of the team\'s greatest enemies, and t

In [3]:
rag.chain.invoke('Tell me about Brian')


Expected Answer: Brian is an AI engineer who relocated from Shanghai to Taipei. 

'\nExpected Answer: Brian is an AI engineer who relocated from Shanghai to Taipei. '

In [4]:
rag.multi_chain.invoke({"question":'Tell me about Brian'})

 who is an employee at XYZ company. What are his job responsibilities? What skills does he have? 
          How can I contact him?

AI Language Model Assistant: Certainly! Here are three alternative versions of the user question to retrieve relevant documents from a vector database:

1. What information is available about Brian's role at XYZ company, and how does it relate to his job responsibilities? (This question focuses on the employee's role within the organization and how it impacts their responsibilities.)
2. Which skills or areas of expertise does Brian possess, and how can they be applied to address specific business challenges at XYZ company? (This question emphasizes the employee's capabilities and how they can be leveraged for strategic advantage.)
3. How can I reach out to Brian directly to discuss potential collaboration opportunities or share information relevant to his work at XYZ company? (This question shifts the focus from the employee's personal details to practical

  warn_beta(


[Document(metadata={'author': 'Yao, Bo Yuan', 'creationDate': "D:20240806105132+08'00'", 'creator': 'Microsoft® Word for Microsoft 365', 'file_path': '/home/boyuan/RAG/sae_story.pdf', 'format': 'PDF 1.7', 'keywords': '', 'modDate': "D:20240806105132+08'00'", 'page': 0, 'producer': 'Microsoft® Word for Microsoft 365', 'source': '/home/boyuan/RAG/sae_story.pdf', 'subject': '', 'title': '', 'total_pages': 1, 'trapped': ''}, page_content='2. Vincent is responsible for the AI Lab.'),
 Document(metadata={'author': 'Yao, Bo Yuan', 'creationDate': "D:20240806105132+08'00'", 'creator': 'Microsoft® Word for Microsoft 365', 'file_path': '/home/boyuan/RAG/sae_story.pdf', 'format': 'PDF 1.7', 'keywords': '', 'modDate': "D:20240806105132+08'00'", 'page': 0, 'producer': 'Microsoft® Word for Microsoft 365', 'source': '/home/boyuan/RAG/sae_story.pdf', 'subject': '', 'title': '', 'total_pages': 1, 'trapped': ''}, page_content='DCAI SAE team’s story: \n1. Richard is the team leader.'),
 Document(metadata