## RAG Demo Using Offline Docs

### Import the packages

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import openai
import sys
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv("env_vars.env")) # read local .env file
openai.api_key  = os.environ['OPENAI_API_KEY']

### Load the documents

In [2]:
from langchain.document_loaders import PyPDFLoader

loaders = [PyPDFLoader("../docs/Inview_June_2023.pdfInview_June_2023.pdf")]
docs = []
for loader in loaders:
    docs.extend(loader.load())
    
print(len(docs))

6


In [3]:
# Split into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

splits = text_splitter.split_documents(docs)
print(len(splits))

24


### Store "chunks" as vectors

In [4]:
# The embedding libraries
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings import SentenceTransformerEmbeddings

from langchain.vectorstores import Chroma

persist_directory = '../docs/chroma-test/'
!rm -rf ../docs/chroma-test/  # remove old database files if any

# # OpenAI embedding
# embeddings = OpenAIEmbeddings()

# # Try a different embedding (HF)
# embeddings = HuggingFaceEmbeddings()

# Try a different embedding (ST)
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [5]:
# Store in the local vector DB
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    persist_directory=persist_directory
)

print(vectordb._collection.count())

24


### Semantic search

In [6]:
question = "What industry sectors are recommended for stocks?"

docs = vectordb.similarity_search(question, k=3)
print(len(docs))

3


In [7]:
# Inspect the top-ranked result
docs[0].page_content

'Inview June 2023   |  05Alternatives\nALTERNATIVES\n↔ • Hedge Fund\nReal Assets• Private Markets\nCommodity\nInsurance•\n•\n•↔\n↔\n↔\n↔Weighting\nchange from\nlast month\n– Underweight  + Overweight  • Neutral \n↔ No change ↑ Increase ↓ DecreaseAllocation\nversus the\nbenchmarkASSET ALLOCATION\nEquity Sector Views (cont.)\nUS\nWe remain cautious given the macroeconomic uncertainties \nand so choose not to take any large sector bets at this time. \nInstead, focus is on individual company earnings resilience \nfrom a bottom-up perspective. We are underweight cyclical \nsectors where there is an unfavourable risk-reward profile, \nsuch as banks, heavy industrials and real estate. There have \nbeen selective additions to exposure in areas that were first \nimpacted by the downturn and subsequently likely to be the \nfirst to recover, including semiconductors, digital advertising \nand housing.  \nAsia ex-Japan\nWe maintain our overweight position in Asia ex-Japan. Within \nthe region we m

In [8]:
for d in docs:
    print(d.metadata)

{'page': 4, 'source': '../docs/Inview_June_2023.pdfInview_June_2023.pdf'}
{'page': 3, 'source': '../docs/Inview_June_2023.pdfInview_June_2023.pdf'}
{'page': 4, 'source': '../docs/Inview_June_2023.pdfInview_June_2023.pdf'}


In [9]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + 
                                   d.page_content for i, d in enumerate(docs)]))

In [10]:
pretty_print_docs(docs)

Document 1:

Inview June 2023   |  05Alternatives
ALTERNATIVES
↔ • Hedge Fund
Real Assets• Private Markets
Commodity
Insurance•
•
•↔
↔
↔
↔Weighting
change from
last month
– Underweight  + Overweight  • Neutral 
↔ No change ↑ Increase ↓ DecreaseAllocation
versus the
benchmarkASSET ALLOCATION
Equity Sector Views (cont.)
US
We remain cautious given the macroeconomic uncertainties 
and so choose not to take any large sector bets at this time. 
Instead, focus is on individual company earnings resilience 
from a bottom-up perspective. We are underweight cyclical 
sectors where there is an unfavourable risk-reward profile, 
such as banks, heavy industrials and real estate. There have 
been selective additions to exposure in areas that were first 
impacted by the downturn and subsequently likely to be the 
first to recover, including semiconductors, digital advertising 
and housing.  
Asia ex-Japan
We maintain our overweight position in Asia ex-Japan. Within 
the region we maintain a relativel

In [11]:
# Persist the vector DB for RAG
vectordb.persist()

### Retrieval

In [12]:
from langchain.chat_models import ChatOpenAI

# Invoke the LLM
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0)

  warn_deprecated(


In [13]:
# Create QA chain and prompt template
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Build the prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Run the chain
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [14]:
# question = "What is the main topic of this document?"
# question = "What industry sectors are recommended for stocks?"
question = "What is the house view between stocks and bonds?"

result = qa_chain({"query": question})
result["result"]

  warn_deprecated(
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


'The house view is to maintain a moderate overweight in both equities and bonds, with adjustments in the allocation within asset classes. Within equities, there is a trimming of the overweight in Asian equities and a reduction in exposure to European markets. In fixed income, longer-dated government bonds and local currency emerging market debt are seen as attractive.'

In [15]:
# result["source_documents"][2]

In [16]:
question = "What does the document suggest about currencies?"

result = qa_chain({"query": question})
result["result"]

'The document suggests that the US dollar is expected to weaken in the second half of the year due to interest rate trends in Europe and the UK. Additionally, the document indicates an increase in exposure to emerging market local currency debt and a slight reduction in exposure to emerging market hard currency debt. Overall, the document suggests a strategic approach to currency allocation based on global economic conditions and central bank actions.'

In [17]:
# pretty_print_docs(result["source_documents"])

### Alternative: Compression retrieval

In [18]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# Wrap our vectorstore
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_kwargs={"k": 3})
)

In [19]:
question = "What does the document suggest about bonds?"

compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

Document 1:

to limit the riskiness of the portfolio, exposure to high yield bonds should be reduced in favour of investment grade corporate bonds.
----------------------------------------------------------------------------------------------------
Document 2:

- Within fixed income, markets currently anticipate the Federal Reserve will cut rates by the end of the year as inflation is expected to decelerate. Therefore, in the context of a decelerating economy, declining inflation, and tight spreads in both US and European credit, we are reducing our exposure to high yield. Additionally, in response to changes in rate expectations and the recent increase in yields, portfolio duration is being increased to levels around 5-7 years by adding to sovereign bond exposure across currencies. Investment grade spreads remain attractive and therefore we maintain our overweight position.
----------------------------------------------------------------------------------------------------
Document 3:

In [20]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=compression_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

question = "What does the document suggest about bonds?"
result = qa_chain({"query": question})
result["result"]

'The document suggests reducing exposure to high yield bonds in favor of investment grade corporate bonds, increasing portfolio duration with sovereign bond exposure, and finding attractive investment grade spreads.'

In [21]:
# Try on Mistral 7B model (offline)
from llama_cpp import Llama

model_path = "/Users/trucvietle/Downloads/llm-models/mistral-7b-instruct-v0.1.Q6_K.gguf"
llm = Llama(model_path=model_path,
            n_ctx=8192, n_batch=512,
            n_threads=7, n_gpu_layers=2,
            verbose=False, seed=42)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/trucvietle/Downloads/llm-models/mistral-7b-instruct-v0.1.Q6_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader:

In [22]:
query = "What does the document suggest about bonds?"

contexts = [d.page_content for d in compressed_docs]

prompt_start = (
    "Answer the question based on the context below.\n\n"+
    "Context:\n"
)

prompt_end = (
    f"\n\nQuestion: {query}\nAnswer:"
)

prompt = (
    prompt_start + "\n\n---\n\n".join(contexts) + 
    prompt_end
)

print(prompt)

Answer the question based on the context below.

Context:
to limit the riskiness of the portfolio, exposure to high yield bonds should be reduced in favour of investment grade corporate bonds.

---

- Within fixed income, markets currently anticipate the Federal Reserve will cut rates by the end of the year as inflation is expected to decelerate. Therefore, in the context of a decelerating economy, declining inflation, and tight spreads in both US and European credit, we are reducing our exposure to high yield. Additionally, in response to changes in rate expectations and the recent increase in yields, portfolio duration is being increased to levels around 5-7 years by adding to sovereign bond exposure across currencies. Investment grade spreads remain attractive and therefore we maintain our overweight position.

---

increased bond yields and the nearing of the end of monetary policy tightening make longer-dated government bonds attractive, including local currency emerging market de

In [23]:
output = llm(prompt, echo=True, stream=False, max_tokens=4096)

output_str = output["choices"][0]["text"].replace(prompt, "")
print(output_str)

 The document suggests that in the context of a decelerating economy, declining inflation, and tight spreads in both US and European credit, reducing exposure to high yield bonds should be considered in favor of investment grade corporate bonds. Additionally, it suggests that longer-dated government bonds, including local currency emerging market debt, are attractive due to increased bond yields and the nearing of the end of monetary policy tightening.


## Create a chatbot!!

In [None]:
import panel as pn  # GUI
pn.extension()

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

In [None]:
# from langchain.chat_models import ChatOpenAI

# # Invoke the LLM
# llm_name = "gpt-3.5-turbo"
# llm = ChatOpenAI(model_name=llm_name, temperature=0)

In [None]:
def load_db(file, chain_type, k):
    
    # load documents
    loader = PyPDFLoader(file)
    documents = loader.load()
    
    # split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)
    
    # define embedding
    embeddings = OpenAIEmbeddings()
    
    # create vector database from data
    db = DocArrayInMemorySearch.from_documents(docs, embeddings)
    
    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    
    # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name=llm_name, temperature=0), 
        chain_type=chain_type, 
        retriever=retriever, 
        return_source_documents=True,
        return_generated_question=True,
    )
    
    return qa 

In [None]:
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        # self.loaded_file = "docs/cs229_lectures/MachineLearning-Lecture01.pdf"
        self.loaded_file = "../docs/Inview_June_2023.pdfInview_June_2023.pdf"
        self.qa = load_db(self.loaded_file, "stuff", 4)
    
    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")
        else:
            file_input.save("temp.pdf")  # local copy
            self.loaded_file = file_input.filename
            button_load.button_style="outline"
            self.qa = load_db("temp.pdf", "stuff", 4)
            button_load.button_style="solid"
            
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File: {self.loaded_file}")

    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 

In [None]:
# Create a chatbot
cb = cbfs()

In [None]:
# # Test the functions
# filename = "../docs/Inview_June_2023.pdfInview_June_2023.pdf"
# qa = load_db(filename, "stuff", 4)

In [None]:
# Create the panels
file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Enter text here…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

jpg_pane = pn.pane.Image( './img/convchain.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)

In [None]:
# dashboard