# **OJK ChatBot - LangChain**

## **Setup**

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

## **Config**

In [2]:
from utils.config import get_config
from utils.questions import get_question
from utils.model_config import ModelName, get_model

config = get_config()

In [3]:
STORE = False
DELETE = False
TOP_K = 6
query_str = get_question("e")
llm_model, embed_model = get_model(model_name=ModelName.OPENAI, config=config)

## **Indexing**

### **Load**

In [4]:
from utils.documents_text_extract import extract_all_documents_in_directory

documents_dir = './data/documents'
metadata_path = './data/metadata/files_metadata.csv'

if STORE:
    documents = extract_all_documents_in_directory(documents_dir, metadata_path, treshold=0.98)


### **Split**

In [5]:
from utils.document_split import document_splitter

if STORE:
    all_splits = document_splitter(docs=documents)

### **Storing**

In [6]:
from utils.vector_store import PineconeIndexManager

pinecone = PineconeIndexManager(index_name='ojk', embed_model=embed_model)

if STORE:
    pinecone.store_vector_index(docs=all_splits, delete=False)
    vector_store = pinecone.load_vector_index()
else: 
    vector_store = pinecone.load_vector_index()

## **Retrieval and Generation**

### **Retrieve**

In [7]:
from langchain_cohere import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
import os

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
# compressor = CohereRerank(cohere_api_key=os.getenv('COHERE_API_KEY'))
# retriever = ContextualCompressionRetriever(
#     base_compressor=compressor, base_retriever=vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
# )

In [8]:
retrieved_docs = retriever.invoke(input=query_str)
retrieved_docs

[Document(metadata={'effective_date': '31 Desember 1958', 'file_name': 'ojk-undang_undang-84_tahun_1958-31121958-uu_republik_indonesia_tentang_pengubahan_pasal_pasal_16_dan_19_undang_undang_pokok_bank_indonesia_uu_nomor_84_tahun_1958_pdf.pdf', 'file_url': 'https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf', 'regulation_number': '84 Tahun 1958', 'regulation_type': 'Undang-Undang', 'sector': 'Perbankan', 'subsector': 'Bank Umum', 'title': 'Undang-Undang Republik Indonesia tentang Pengubahan Pasal-Pasal 16 dan 19 Undang-Undang Pokok Bank Indonesia'}, page_content='PRESIDEN\nREPUBLIK INDONESIA\nPENJELASAN\nTENTANG\nUNDANG-UNDANG PERUBAHAN PASAL-PASAL 16 dan 19\nDARI UNDANG-UNDANG POKOK BANK INDONESIA.\nI.\nPENJELASAN UMUM.\n1.\nDalam pasal 16 ayat 1 dari Undang-undang Pokok Bank Indonesia disebutkan\nbahwa banyaknya uang yang beredar harus dijamin sebesar 20%

### **Generate**

In [9]:
from utils.rag_chain_with_chat_history import create_chain_with_chat_history

_TEMPLATE = """Given the following conversation and a follow up question, rephrase the \
follow up question to be a standalone question WITH ITS ORIGINAL LANGUAGE. if the follow \
up question is not clear. 
If the chat history is not relevant to the follow up question, please ignore it.

Chat history:
{chat_history}
Follow up question: {question}
Standalone question:"""

# =====

_ANSWER_TEMPLATE = """Context information is below.
context: {context}

Given the context and the metadata information and not prior knowledge, \
answer the query asking about banking compliance in Indonesia. 
Answer the question based on the context and the metadata information.
ALWAYS ANSWER WITH USER'S LANGUAGE.
Please provide your answer with [regulation_number](file_url) in metadata \
(if possible) in the following format:

Answer... \n\n
Source: [metadata['regulation_number']](metadata['file_url'])

But if you cannot find the regulation number, just provide the answer.

Question: {question}
"""

chain = create_chain_with_chat_history(
    contextualize_q_prompt_str=_TEMPLATE,
    qa_system_prompt_str=_ANSWER_TEMPLATE,
    retriever=retriever,
    llm_model=llm_model,
)

In [10]:
from utils.chat_history import ChatHistory

chat_history = ChatHistory(max_history_length=5)

#### **Streaming**

In [59]:
# response = chain.stream({"chat_history": chat_history, "question": "Berapa SWDKLLJ dari sedan?"})
# chat_history.add_chat(response["question"], response["answer"])

from utils.rag_chain_with_chat_history import print_answer_stream

await print_answer_stream(chain=chain, chat_history=chat_history, question=query_str)

# answer_chunks = []
# async for chunk in chain.astream({"chat_history": chat_history, "question": query_str}):
#     if 'question' in chunk:
#         question = chunk['question']
#     if 'answer' in chunk:
#         answer_chunks.append(chunk['answer'])
#         print(chunk['answer'], end='', flush=True)

# answer = ''.join(answer_chunks)
# chat_history.add_chat(question, answer)

Jaminan moneter pada tanggal 20 Agustus 1958 tinggal sebesar 7,30%.

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)

#### **With Question, Answer, and Context**

In [12]:
from utils.rag_chain_with_chat_history import get_response

response = get_response(chat_history=chat_history, chain=chain, question=query_str)
response

{'question': 'Berapa persen jaminan moneter pada tanggal 20 Agustus 1958?',
 'answer': 'Jaminan moneter pada tanggal 20 Agustus 1958 tinggal sebesar 7,30%.\n\nSource: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)',
 'context': '[\n  {\n    "metadata": {\n      "effective_date": "31 Desember 1958",\n      "file_name": "ojk-undang_undang-84_tahun_1958-31121958-uu_republik_indonesia_tentang_pengubahan_pasal_pasal_16_dan_19_undang_undang_pokok_bank_indonesia_uu_nomor_84_tahun_1958_pdf.pdf",\n      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf",\n      "regulation_number": "84 Tahun 1958",\n      "regulation_type": "Undang-Undang",\n      "sector": "Perbankan",\n      "subsector": "Bank Umum"

In [13]:
print(response['answer'])

Jaminan moneter pada tanggal 20 Agustus 1958 tinggal sebesar 7,30%.

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)


In [14]:
print(response['context'])

[
  {
    "metadata": {
      "effective_date": "31 Desember 1958",
      "file_name": "ojk-undang_undang-84_tahun_1958-31121958-uu_republik_indonesia_tentang_pengubahan_pasal_pasal_16_dan_19_undang_undang_pokok_bank_indonesia_uu_nomor_84_tahun_1958_pdf.pdf",
      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf",
      "regulation_number": "84 Tahun 1958",
      "regulation_type": "Undang-Undang",
      "sector": "Perbankan",
      "subsector": "Bank Umum",
      "title": "Undang-Undang Republik Indonesia tentang Pengubahan Pasal-Pasal 16 dan 19 Undang-Undang Pokok Bank Indonesia"
    },
    "page_content": "PRESIDEN\nREPUBLIK INDONESIA\nPENJELASAN\nTENTANG\nUNDANG-UNDANG PERUBAHAN PASAL-PASAL 16 dan 19\nDARI UNDANG-UNDANG POKOK BANK INDONESIA.\nI.\nPENJELASAN UMUM.\n1.\nDalam pasal 16 ayat 1 dari Undang-undang Pokok Bank Indonesia disebut

In [15]:
print(chat_history.get_formatted_history())

----------
Human: Berapa persen jaminan moneter pada tanggal 20 Agustus 1958?
----------
Assistant: Jaminan moneter pada tanggal 20 Agustus 1958 tinggal sebesar 7,30%.

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)



#### BATAS SUCI

In [16]:
# import json

# def _combine_documents(docs):
#     """Combine documents into a single JSON string."""
#     doc_list = [{"metadata": doc.metadata, "page_content": doc.page_content} for doc in docs]
#     return json.dumps(doc_list, indent=2)


# def _format_chat_history(chat_history: List[Tuple]) -> str:
#     """Format chat history into a string."""
#     buffer = ""
#     for dialogue_turn in chat_history:
#         human = "Human: " + dialogue_turn[0]
#         ai = "Assistant: " + dialogue_turn[1]
#         buffer += "\n" + "\n".join([human, ai])
#     return buffer


In [17]:
# _inputs_question = RunnableMap(
#     standalone_question=RunnablePassthrough.assign(
#         chat_history=lambda x: _format_chat_history(x["chat_history"])
#     )
#     | CONDENSE_QUESTION_PROMPT
#     | llm_model
#     | StrOutputParser(),
# )

# _context_chain = {
#     "context": itemgetter("standalone_question") | retriever | _combine_documents,
#     "question": lambda x: x["standalone_question"],
# }

# conversational_qa_chain_with_context = (
#     _inputs_question
#     | _context_chain
#     | {
#         "question": itemgetter("question"),
#         "answer": ANSWER_PROMPT | llm_model | StrOutputParser(),
#         "context": itemgetter("context"),
#     }
#     # | StrOutputParser()
# )

In [18]:
# from langserve.pydantic_v1 import BaseModel, Field


# class ChatHistory(BaseModel):
#     """Chat history with the bot."""

#     chat_history: List[Tuple[str, str]] = Field(
#         ...,
#         extra={"widget": {"type": "chat", "input": "question"}},
#     )
#     question: str

# chain = conversational_qa_chain_with_context.with_types(input_type=ChatHistory)

In [19]:
# chat_history = [
#     # ("Berapa SWDKLLJ dari sedan?", "Nilai SWDKLLJ dari sedan adalah Rp140.000"),
# ]


In [20]:
# chat_history

In [21]:
# from pprint import pprint

# # response = chain.invoke({"question": query_str, "chat_history": ["Berapa SWDKLLJ dari sedan?", "Rp140.000"]})
# response = chain.invoke({"question": "Kalau 7/33/PBI/2005?", "chat_history": chat_history})
# chat_history.append((response["question"], response["answer"]))
# # pprint(response)

In [22]:
# response

In [23]:
# from langchain_core.prompts import ChatPromptTemplate

# template = """\
# Context information is below.
# context: {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# Please provide your answer with [regulation_number](file_url) in metadata 
# (if possible) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# Question: {question}
# """
# # If the question is not clear, use chat history below to understand the context and the question.
# # Chat history: {chat_history}

# prompt = ChatPromptTemplate.from_template(template)

In [24]:
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.runnables import RunnablePassthrough
# from operator import itemgetter

# # def format_docs(docs):
# #     page_content = [doc.page_content for doc in docs]
# #     return page_content

# # def format_metadata(docs):
# #     list_dict = [doc.metadata for doc in docs]
# #     for dict in list_dict:
# #         dict.pop('file_name')
# #     return list_dict


# rag_chain = (
#     {"context": retriever,"question": RunnablePassthrough()}
#     | prompt
#     | llm_model
#     | StrOutputParser()
# )

In [25]:
# for chunk in rag_chain.stream(query_str):
#     print(chunk, end="", flush=True)

In [26]:
# from langchain_core.prompts import PromptTemplate
# from langchain.chains.conversation.memory import ConversationSummaryBufferMemory
# from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain


# template = """
# You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
# If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.
# Do not include source citations in your answer. The sources will be added separately.

# Context: {context}
# History: {chat_history}
# Human: {question}
# Assistant: """

# PROMPT = PromptTemplate(
#     input_variables=["context", "chat_history", "question"],
#     template=template
# )

# # Create memory
# memory = ConversationSummaryBufferMemory(
#     llm=llm_azure,
#     memory_key="chat_history",
#     return_messages=True,
#     output_key="answer"
# )

# # Create the conversational chain
# qa_chain = ConversationalRetrievalChain.from_llm(
#     llm=llm_azure,
#     retriever=retriever,
#     memory=memory,
#     combine_docs_chain_kwargs={"prompt": PROMPT},
#     return_source_documents=True
# )

In [27]:
# def format_sources(source_documents):
#     sources = set()
#     for doc in source_documents:
#         if 'regulation_number' in doc.metadata and 'file_url' in doc.metadata:
#             sources.add(f"[{doc.metadata['regulation_number']}]({doc.metadata['file_url']})")
#     return "\n".join(sources)

# # Function to get response
# def get_response(query):
#     result = qa_chain({"question": query})
#     answer = result['answer']
#     sources = format_sources(result['source_documents'])
#     full_response = f"{answer}\n\nSources:\n{sources}"
#     return full_response

In [28]:
# # Example usage
# response = get_response(query_str)
# print(response)
# # print("\n" + "="*50 + "\n")

In [29]:
# from typing import List, Tuple
# from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, format_document
# from langchain.memory import ConversationBufferMemory
# from langchain_core.runnables import RunnablePassthrough, RunnableLambda

In [30]:
# _template = """[INST] <<SYS>>Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.<</SYS>>

# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:[/INST]"""
# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

# # =================================================================================================

# template =  """\
# Context information is below.
# context = {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# ALWAYS provide your answer with [regulation_number](file_url) metadata \
# (if the answer only in a specific regulation) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# For "Answer" retrieve from context.page_content the answer to the user's question.
# For "Source" retrieve from context.metadata['regulation_number'] the regulation number and from context.metadata['file_url'] the file url.
# """
# ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

# DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

In [31]:
# def _combine_documents(
#     docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
# ):
#     doc_strings = [format_document(doc, document_prompt) for doc in docs]
#     return document_separator.join(doc_strings)

# def _format_chat_history(chat_history: List[Tuple]) -> str:
#     buffer = ""
#     for dialogue_turn in chat_history:
#         human = "Human: " + dialogue_turn[0]
#         ai = "Assistant: " + dialogue_turn[1]
#         buffer += "\n" + "\n".join([human, ai])
#     return buffer

In [32]:
# from operator import itemgetter

# memory = ConversationBufferMemory(
#     memory_key="history", 
#     return_messages=True, 
#     output_key="answer", 
#     input_key="question"
# )

# loaded_memory = RunnablePassthrough.assign(
#     chat_history=RunnableLambda(memory.load_memory_variables)
#     | itemgetter("history"),
# )

In [33]:
# from langchain_core.output_parsers import StrOutputParser


# standalone_question = {
#     "standalone_question": {
#         "question": lambda x: x["question"],
#         "chat_history": lambda x: x["chat_history"],
#     }
#     | CONDENSE_QUESTION_PROMPT
#     | llm_model
#     | StrOutputParser(),
# }

# # Now we retrieve the documents
# retrieved_documents = {
#     "docs": itemgetter("standalone_question") | retriever,
#     "question": lambda x: x["standalone_question"],
# }

# # Now we construct the inputs for the final prompt
# final_inputs = {
#     "context": lambda x: _combine_documents(x["docs"]),
#     "question": itemgetter("question"),
# }

# # And finally, we do the part that returns the answers
# answer = {
#     "answer": final_inputs | ANSWER_PROMPT | llm_model,
#     "docs": itemgetter("docs"),
# }

# # And now we put it all together!
# final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [34]:
# response = final_chain.invoke({"question": query_str})
# response

In [35]:
# inputs = {"question": "Apa kepanjangan dari SWDKLLJ?"}
# for token in final_chain.stream(inputs):
#     print f"{token}"


# answer = ""

# for token in final_chain.stream(inputs):
#     # Assuming each token is a dictionary with an 'answer' key
#     if isinstance(token, dict) and "answer" in token:
#         # print(token["answer"].content, end="", flush=True)
#         print(token["answer"])
#         # print(token)re
#         # answer.append(token["answer"].content)

# # print(answer)

In [36]:
# from operator import itemgetter
# from langchain.prompts import PromptTemplate
# from langchain_core.output_parsers import StrOutputParser

# _template = """[INST] <<SYS>>Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.<</SYS>>

# Chat History:
# {chat_history}
# Follow Up Input: {question}
# Standalone question:[/INST]"""

# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

# standalone_question = {
#     "standalone_question": {
#         "question": lambda x: x["question"],
#         "chat_history": lambda x: _format_chat_history(x["chat_history"]),
#     }
#     | CONDENSE_QUESTION_PROMPT
#     | llm_model
#     | StrOutputParser(),
# }

In [37]:
# # Now we retrieve the documents
# retrieved_documents = {
#     "docs": itemgetter("standalone_question") | retriever,
#     "question": lambda x: x["standalone_question"],
# }

# # Now we construct the inputs for the final prompt
# final_inputs = {
#     "context": lambda x: _combine_documents(x["docs"]),
#     "question": itemgetter("question"),
# }

In [38]:
# from langchain.prompts import PromptTemplate

### Answer question ###
# template = """\
# Informasi konteks ada di bawah.
# konteks = {context}

# Mengingat konteks dan informasi metadata dan bukan pengetahuan sebelumnya, \
# menjawab pertanyaan tentang kepatuhan perbankan di Indonesia.
# Jawab pertanyaan berdasarkan informasi konteks beserta metadata nya.
# SELALU JAWAB DENGAN MENGGUNAKAN BAHASA INDONESIA.
# ALWAYS provide your answer with [regulation_number](file_url) metadata \
# (if the answer only in a specific regulation) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# For "Answer" retrieve from context.page_content the answer to the user's question.
# For "Source" retrieve from context.metadata['regulation_number'] the regulation number and from context.metadata['file_url'] the file url.

# Question: {question}
# If the question context is not really clear, you can utilize this text history \
# based on prior conversation betwwen human and AI before this question being asked:
# Chat history: {chat_history}
# """

### Answer question ###
# template = """\
# Informasi konteks ada di bawah.
# konteks = {context}

# Mengingat konteks dan informasi metadata dan bukan pengetahuan sebelumnya, \
# menjawab pertanyaan tentang kepatuhan perbankan di Indonesia.
# Jawab pertanyaan berdasarkan informasi konteks beserta metadata nya.
# SELALU JAWAB DENGAN BAHASA PENGGUNA.
# SELALU berikan jawaban Anda dengan [regulation_number](file_url) pada metadata \
# dengan format sebagai berikut:

# Answer... \n\n
# Sumber: [metadata['regulation_number']](metadata['file_url'])

# Untuk "Jawaban" ambil dari context.page_content jawaban atas pertanyaan pengguna.
# Untuk "Sumber" ambil dari context.metadata['regulation_number'] nomor peraturan dan dari context.metadata['file_url'] url file.

# Pertanyaan: {question}

# Jika konteks pertanyaannya kurang jelas, Anda dapat menggunakan teks riwayat ini \
# berdasarkan percakapan sebelumnya antara manusia dan AI sebelum pertanyaan ini diajukan:
# Chat His: {chat_history}
# """




# prompt = PromptTemplate(
#     input_variables=["context", "chat_history", "question"], template=template
# )

In [39]:
# from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
# from langchain.memory import ConversationBufferMemory

# # Create the custom chain
# memory = ConversationBufferMemory(
#     memory_key='chat_history', return_messages=True, output_key='answer')

# chain = ConversationalRetrievalChain.from_llm(
#     llm=llm_azure,
#     retriever=retriever,
#     memory=memory,
#     return_generated_question=True,
#     return_source_documents=True,
#     verbose=True,
#     combine_docs_chain_kwargs={'prompt': prompt},
# )

In [40]:
# response = chain.invoke(query_str)
# response

In [41]:
# print(response["answer"])

In [42]:
# from langchain.chains import create_history_aware_retriever, create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain_community.chat_message_histories import ChatMessageHistory
# from langchain_core.chat_history import BaseChatMessageHistory
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain_core.runnables.history import RunnableWithMessageHistory

# ### Contextualize question ###
# contextualize_q_system_prompt = """Given a chat history and the latest user question \
# which might reference context in the chat history, formulate a standalone question \
# which can be understood without the chat history. Do NOT answer the question, \
# just reformulate it if needed and otherwise return it as is."""
# contextualize_q_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", contextualize_q_system_prompt),
#         MessagesPlaceholder("chat_history"),
#         ("human", "{input}"),
#     ]
# )
# history_aware_retriever = create_history_aware_retriever(
#     llm_model, retriever, contextualize_q_prompt
# )

# ### Answer question ###
# qa_system_prompt = """\
# Context information is below.
# context = {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# ALWAYS provide your answer with [regulation_number](file_url) metadata \
# (if the answer only in a specific regulation) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# For "Answer" retrieve from context.page_content the answer to the user's question.
# For "Source" retrieve from context.metadata['regulation_number'] the regulation number and from context.metadata['file_url'] the file url.
# """

# qa_system_prompt_chain = retriever | 

# qa_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", qa_system_prompt),
#         MessagesPlaceholder("chat_history"),
#         ("human", "{input}"),
#     ]
# )
# question_answer_chain = create_stuff_documents_chain(llm_model, qa_prompt)

# rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [43]:
# store = {}

# def get_session_history(session_id: str) -> BaseChatMessageHistory:
#     if session_id not in store:
#         store[session_id] = ChatMessageHistory()
#     return store[session_id]

# conversational_rag_chain = RunnableWithMessageHistory(
#     rag_chain,
#     get_session_history,
#     input_messages_key="input",
#     history_messages_key="chat_history",
#     output_messages_key="answer",
# )

In [44]:
# from pprint import pprint

# answer = conversational_rag_chain.invoke(
#     {"input": query_str},
#     config={
#         "configurable": {"session_id": "abc123"}
#     },
# )

# answer

In [45]:
# print(answer['context'])

In [46]:
# from langchain_core.prompts import ChatPromptTemplate

# template = """\
# Context information is below.
# context: {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# Please provide your answer with [regulation_number](file_url) in metadata 
# (if possible) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# Question: {question}

# If the question context is not really clear, you can utilize this text history \
# based on prior conversation betwwen human and AI before this question being asked:
# Chat history: {history}
# """

# prompt = ChatPromptTemplate.from_template(template)

In [47]:
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# system_prompt = """\
# Context information is below.
# context: {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# Please provide your answer with [regulation_number](file_url) in metadata 
# (if possible) in the following format:

# Answer... \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])


# """

# prompt = ChatPromptTemplate.from_messages([
#     ("system", system_prompt),
#     ("{chat_history}"),
#     ("human", "{question}"),
#     # ("system", "context: {context}")
# ])

In [48]:
# ## Answer question ###
# template = """\
# Informasi konteks ada di bawah.
# konteks = {context}

# Mengingat konteks dan informasi metadata dan bukan pengetahuan sebelumnya, \
# menjawab pertanyaan tentang kepatuhan perbankan di Indonesia.
# Jawab pertanyaan berdasarkan informasi konteks beserta metadata nya.
# SELALU JAWAB DENGAN BAHASA PENGGUNA.
# SELALU berikan jawaban Anda dengan [regulation_number](file_url) pada metadata \
# dengan format sebagai berikut:

# Answer... \n\n
# Sumber: [metadata['regulation_number']](metadata['file_url'])

# Untuk "Jawaban" ambil dari context.page_content jawaban atas pertanyaan pengguna.
# Untuk "Sumber" ambil dari context.metadata['regulation_number'] nomor peraturan dan dari context.metadata['file_url'] url file.
# """

In [49]:
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain.memory import ChatMessageHistory
# from langchain_core.runnables.history import RunnableWithMessageHistory
# from operator import itemgetter


# prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", template),
#         MessagesPlaceholder(variable_name="chat_history"),
#         ("user", "{input}"),
#     ]
# )

# prompt = ChatPromptTemplate.from_template(template)


# chain = {"context": retriever, "input": itemgetter("input"), "chat_history": itemgetter("chat_history")} | prompt | llm_model

In [50]:
# chat_history = ChatMessageHistory()


# chain_with_message_history = RunnableWithMessageHistory(
#     chain,
#     lambda session_id: chat_history,
#     input_messages_key="input",
#     history_messages_key="chat_history",
# )

In [51]:
# from langchain_core.runnables import RunnablePassthrough

# def summarize_messages(chain_input):
#     stored_messages = chat_history.messages
#     if len(stored_messages) == 0:
#         return False
#     summarization_prompt = ChatPromptTemplate.from_messages(
#         [
#             MessagesPlaceholder(variable_name="chat_history"),
#             (
#                 "user",
#                 "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
#             ),
#         ]
#     )
#     summarization_chain = summarization_prompt | llm_model

#     summary_message = summarization_chain.invoke({"chat_history": stored_messages})

#     chat_history.clear()

#     chat_history.add_message(summary_message)

#     return True

# chain_with_summarization = (
#     RunnablePassthrough.assign(messages_summarized=summarize_messages)
#     | chain_with_message_history
# )

In [52]:
# itemgetter("input")

In [53]:
# response = chain_with_summarization.invoke(
#     {"input": "What did I say my name was?"},
#     {"configurable": {"session_id": "unused"}},
# )

In [54]:
# from langchain_core.chat_history import BaseChatMessageHistory
# from langchain_core.runnables.history import RunnableWithMessageHistory
# from langchain_community.chat_message_histories import ChatMessageHistory

# store = {}

# def get_session_history(session_id: str) -> BaseChatMessageHistory:
#     if session_id not in store:
#         store[session_id] = ChatMessageHistory()
#     return store[session_id]

# conversational_rag_chain = RunnableWithMessageHistory(
#     rag_chain,
#     get_session_history,
#     input_messages_key="question",
#     history_messages_key="chat_history",
# )

In [55]:
# from pprint import pprint


# rag_chain.invoke(query_str)

# answer = conversational_rag_chain.invoke(
#     {"question": query_str},
#     config={

        
#         "configurable": {"session_id": "abc123"}
#     },
# )

# answer

In [56]:
# for chunk in rag_chain.stream(query_str):
#     print(chunk, end="", flush=True)


# def format_docs(docs):
#     page_content = [doc.page_content for doc in docs]
#     return page_content


# def format_metadata(docs):
#     list_dict = [doc.metadata for doc in docs]
#     for dict in list_dict:
#         dict.pop('file_name')
#     return list_dict


## **Build Agent**

In [57]:
# from langchain.tools.retriever import create_retriever_tool
# from langchain.agents import initialize_agent

# retriever_tool = create_retriever_tool(
#     retriever,
#     "langsmith_search",
#     "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
# )

# tools = [retriever_tool]

# agent = initialize_agent(
#     tools=tools,
#     llm=llm_model,
#     prompt=prompt,
#     agent="zero-shot-react-description",
# )

In [58]:
# agent.invoke({"input": query_str})