In [1]:
import requests
from bs4 import BeautifulSoup

url = 'https://www.consumerfinance.gov/rules-policy/regulations/1024/17/'

response = requests.get(url)

soup = BeautifulSoup(response.content, 'html.parser')
text_content = soup.get_text(separator='\n', strip=True)
file_path = 'regulations_1024_17.txt'
with open(file_path, 'w', encoding='utf-8') as file:
    file.write(text_content)

file_path


'regulations_1024_17.txt'

In [None]:
%pip install chainlit langchain langchain-community faiss-cpu ctransformer

: 

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
import chainlit as cl

: 

In [None]:
DATA_PATH = 'C:\Users\User\Downloads\notebooks\notebooks\data'
DB_FAISS_PATH = 'C:\Users\User\Downloads\notebooks\notebooks\vectorstore\db_faiss'

: 

In [18]:
def create_vector_db():
    loader = DirectoryLoader(DATA_PATH,
                             glob='*.txt',
                             loader_cls=TextLoader)

    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                                   chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

In [20]:
vectorstore = create_vector_db()

In [22]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

In [23]:
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain


In [24]:
def load_llm():
    llm = CTransformers(
        model = "TheBloke/Llama-2-7B-Chat-GGML",
        model_type="llama",
        max_new_tokens = 512,
        temperature = 0.5
    )
    return llm

In [34]:
%pip install ctransformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting ctransformers
  Downloading ctransformers-0.2.27-py3-none-any.whl.metadata (17 kB)
Collecting py-cpuinfo<10.0.0,>=9.0.0 (from ctransformers)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Downloading ctransformers-0.2.27-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, ctransformers
Successfully installed ctransformers-0.2.27 py-cpuinfo-9.0.0
Note: you may need to restart the kernel to use updated packages.


In [32]:
def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization= True)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)

    return qa

In [26]:
def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

In [None]:

query_list = ["What is escrow account?", "Limits on payments to escrow accounts", "What are some Methods of escrow account analysis."]

: 

In [None]:
for i in len(query_list):
    response = []
    response = final_result(query_list[i])
    response.append(response)

print(response)

: 

In [30]:
@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(content="Starting the bot...")
    await msg.send()
    msg.content = "Hi, Welcome to Consumer Finance Bot. What is your query?"
    await msg.update()

    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain") 
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    cb.answer_reached = True
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["result"]
    sources = res["source_documents"]

    if sources:
        answer += f"\nSources:" + str(sources)
    else:
        answer += "\nNo sources found"

    await cl.Message(content=answer).send()