In [1]:
import os
from dotenv import load_dotenv
from langchain_community.chat_models.huggingface import ChatHuggingFace
from langchain.chains import LLMChain
from langchain_community.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from dotenv import load_dotenv


In [2]:
# Selecting LLM Model from HuggingFace 
REPO_ID = "google/gemma-2b-it"

In [3]:
# Getting HF_TOKEN
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN_CALVIN")

Run Locally using LangChain Pipeline

In [4]:
# hf = HuggingFacePipeline.from_model_id(
#     model_id=REPO_ID,
#     task="text-generation",
#     pipeline_kwargs={"max_new_tokens": 100},
# )

Run Model using  API Inference HuggingFace

In [5]:
hf = HuggingFaceHub(
    huggingfacehub_api_token = HF_TOKEN,
    repo_id=REPO_ID,
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 100,
        "top_k": 30,
        "max_length": 400,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

  warn_deprecated(


In [6]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate.from_template(template)

In [7]:
llm_chain = LLMChain(prompt=prompt, llm=hf)

# Getting The Data

In [58]:
from langchain_community.document_loaders import TextLoader, PyPDFLoader, DirectoryLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

In [104]:
list_of_documents_contents = []
list_of_document_titles = []
docs_path = "../Bank Product Data/"

In [105]:
for filename in os.listdir(docs_path):
    if(filename.endswith(".txt")):
        list_of_document_titles.append(filename)

print(list_of_document_titles)

['Business bank accounts.txt', 'Buy now, pay later.txt', 'Car Loans.txt', 'Commonwealth Bank of Australia.txt', 'CommonWealth Our company.txt', 'Commonwealth Private.txt', 'Credit cards.txt', 'Debt consolidation loans.txt', 'Disputing a transaction.txt', 'electric-vehicle-loan.txt', 'Everyday Account Smart Access.txt', 'EVERYDAY ACCOUNT.txt', 'Fixed Rate Personal Loans.txt', 'Foreign Currency Account.txt', 'GoalSaver.txt', 'Helpful ways to offset the rising cost of living.txt', 'Home Improvement Loan.txt', 'How do I activate my CommBank card.txt', 'How do I close my CommBank account.txt', 'How do I report my card lost, stolen or damaged.txt', 'How long does it take to transfer money.txt', 'InstalPay.txt', 'International Money Transfers (IMT).txt', 'International Money Transfers.txt', 'Klarna.txt', 'NetBank Saver.txt', 'Pensioner Security Account.txt', 'Personal loans.txt', 'Premier and Private Banking.txt', 'Premier Banking.txt', 'Saving Account & Term Deposit.txt', 'Secured Personal L

In [None]:
for title in list_of_document_titles:
    path = docs_path + title
    loader = TextLoader(path, encoding='utf-8')
    data = loader.load()
    list_of_documents_contents.append(data[0])

In [107]:
len(list_of_documents_contents)



# Embedding Model

In [85]:
import requests

In [86]:
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"

In [87]:
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{embedding_model}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}

In [61]:
# create the open-source embedding function
reviews_vector_db = Chroma(
    persist_directory=REVIEWS_CHROMA_PATH,
    embedding_function=SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [89]:
def query(texts):
    response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
    return response.json()

In [95]:
output = query(list_of_documents_contents)

TypeError: Object of type Document is not JSON serializable

# Getting Data using Vectara

In [8]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.vectorstores import Vectara

In [9]:
VECTARA_CUSTOMER_ID = os.getenv("VECTARA_CUSTOMER_ID")
VECTARA_CORPUS_ID = os.getenv("VECTARA_CORPUS_ID")
VECTARA_API_KEY = os.getenv("VECTARA_API_KEY")

In [53]:
vectara_client = Vectara(
                vectara_customer_id=VECTARA_CUSTOMER_ID,
                vectara_corpus_id=VECTARA_CORPUS_ID,
                vectara_api_key=VECTARA_API_KEY
)

In [19]:
def get_knowledge_content(vectara, query, threshold=0.75):
    found_docs = vectara.similarity_search_with_score(
        query,
        score_threshold=threshold,
    )
    knowledge_content = ""
    for number, (score, doc) in enumerate(found_docs):
        knowledge_content += f"Document {number}: {found_docs[number][0].page_content}\n"
    return knowledge_content

In [52]:
user_input = "What is Commonwealth Bank?"

In [74]:
knowledge_content = get_knowledge_content(vectara_client, user_input)

In [87]:
knowledge = vectara_client.similarity_search_with_score(user_input, score_threshold=0.75)
print(knowledge)

[(Document(page_content='The target market for this product will be found within the product’s\nTarget Market Determination, available at commbank.com.au/tmd\nThis product is issued by Commonwealth Bank of Australia\nABN 48 123 123 124, AFSL 234945. Features at a glance Features at\na glance\nThe Commonwealth Private\nBank Account is an exclusive\ninterest bearing transaction\naccount for Commonwealth\nPrivate clients. At-call access to your money via online\n(NetBank and the CommBank app),\nTelephone Banking, ATMs and EFTPOS\n\n• No minimum balance requirements\n• Banded interest rates\n• Interest is calculated daily and credited\nmonthly\n• No monthly account keeping fee\n• One card banking – access to your\n    account through a Commonwealth\n    Bank Debit Mastercard or Keycard\n• Direct deposit of dividends and salary\n• PayTo and direct debit of your regular\n    bills and loan repayments\n• Accessing your account through a\n    Commonwealth Bank deposit card\n\n\n\n\n\n\n\n\n\n\

In [64]:
retriever = vectara_client.as_retriever(
    search_type = "mmr",
    search_kwargs={"k": 3, "score_threshold": 0.7}
)
d = retriever.get_relevant_documents(
    "What is Commonwealth Bank?", k=2
)
print(retriever)

tags=['Vectara'] vectorstore=<langchain_community.vectorstores.vectara.Vectara object at 0x000001AA8C9C89B0> search_type='mmr' search_kwargs={'k': 3, 'score_threshold': 0.7}


In [61]:
def get_sources(documents):
    return documents[:-1]

def get_summary(documents):
    return documents[-1].page_content

In [71]:
(retriever | get_summary ).invoke(user_input)

'The target market for this product will be found within the product’s\nTarget Market Determination, available at commbank.com.au/tmd\nThis product is issued by Commonwealth Bank of Australia\nABN 48 123 123 124, AFSL 234945. Features at a glance Features at\na glance\nThe Commonwealth Private\nBank Account is an exclusive\ninterest bearing transaction\naccount for Commonwealth\nPrivate clients. At-call access to your money via online\n(NetBank and the CommBank app),\nTelephone Banking, ATMs and EFTPOS\n\n• No minimum balance requirements\n• Banded interest rates\n• Interest is calculated daily and credited\nmonthly\n• No monthly account keeping fee\n• One card banking – access to your\n    account through a Commonwealth\n    Bank Debit Mastercard or Keycard\n• Direct deposit of dividends and salary\n• PayTo and direct debit of your regular\n    bills and loan repayments\n• Accessing your account through a\n    Commonwealth Bank deposit card\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCommonw

In [63]:
(retriever | get_sources ).invoke(user_input)

[]

In [81]:
# Creating RAG Chain

from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.summarize import load_summarize_chain

In [82]:
vectara_client = Vectara(
                vectara_customer_id=VECTARA_CUSTOMER_ID,
                vectara_corpus_id=VECTARA_CORPUS_ID,
                vectara_api_key=VECTARA_API_KEY
)

user_input = "What is Commonwealth Bank?"
                
retriever = vectara_client.as_retriever(
    search_type = "mmr",
    search_kwargs={"k": 3, "score_threshold": 0.7}
)



In [83]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [49]:
bot = ConversationalRetrievalChain.from_llm(
    llm_chain, retriever, memory=memory,
)

In [76]:
query = "What is Commonwealth Bank?"

In [88]:
chain = load_summarize_chain(llm_chain, chain_type="map_reduce", verbose=True)

chain.run(knowledge)



[1m> Entering new MapReduceDocumentsChain chain...[0m


AttributeError: 'tuple' object has no attribute 'page_content'

In [79]:
chat_history = []
result = chain({"question": query, "chat_history": chat_history})
result["answer"]

ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

In [72]:
review_chain = (
    {"context": retriever, 
     "question": user_input}
    | prompt
    | llm_chain
)


TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>

In [66]:
# retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 4, 'fetch_k': 20})
# prompt = hub.pull("rlm/rag-prompt")

# def format_docs(docs):
#     return "\n\n".join(doc.page_content for doc in docs)

# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm_chain
# )

In [67]:
# Ask the LLM
review_chain.invoke("Why Choose CommBank for my next car?")

{'question': StringPromptValue(text="Question: Why Choose CommBank for my next car?\n\nAnswer: Let's think step by step."),
 'text': 'Question: text="Question: Why Choose CommBank for my next car?\\n\\nAnswer: Let\'s think step by step."\n\nAnswer: Let\'s think step by step.'}