# **OJK ChatBot - LangChain**

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import os

openai_api_key = os.getenv("OPENAI_KEY")
azure_api_key = os.getenv("AZURE_OPENAI_KEY")
azure_api_version = os.getenv("API_VERSION")
azure_api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_api_deployment_id = os.getenv("AZURE_OPENAI_DEPLOYMENT_ID")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

## **Config**

In [4]:
config_openai = {
    'api_key': openai_api_key,
}

config_azure = {
    'azure_endpoint': azure_api_endpoint,
    'azure_deployment': azure_api_deployment_id,
    'api_version': azure_api_version,
    'api_key': azure_api_key
}

In [5]:
STORE = False
DELETE = False

In [6]:
TOP_K = 6

## **Create Models**

In [7]:
from utils.model_config import ModelName, get_model

llm_openai, embedding_llm_openai = get_model(model_name=ModelName.OPENAI, config=config_openai)
llm_azure, embedding_llm_azure = get_model(model_name=ModelName.AZURE_OPENAI, config=config_azure)
llm_ollama, embedding_llm_ollama = get_model(model_name=ModelName.OLLAMA)

In [8]:
llm_model = llm_azure
embed_model = embedding_llm_azure

## **Indexing**

### **Load**

In [9]:
from utils.documents_text_extract import extract_all_documents_in_directory

documents_dir = './data/documents'
metadata_path = './data/metadata/files_metadata.csv'

if STORE:
    documents = extract_all_documents_in_directory(documents_dir, metadata_path, treshold=0.98)


### **Split**

In [10]:
from utils.document_split import document_splitter

if STORE:
    all_splits = document_splitter(docs=documents)

### **Storing**

In [11]:
from utils.vector_store import PineconeIndexManager

pinecone = PineconeIndexManager(index_name='ojk',api_key=pinecone_api_key, embed_model=embed_model)

if STORE:
    pinecone.store_vector_index(docs=all_splits, delete=False)
    vector_store = pinecone.load_vector_index()
else: 
    vector_store = pinecone.load_vector_index()

## **Retrieval and Generation**

In [36]:
a = "Apa judul peraturan 7/33/PBI/2005?" # Pencabutan atas Peraturan Bank Indonesia Nomor 5/17/PBI/2003 tentang Persyaratan dan Tata Cara Pelaksanaan Jaminan Pemerintah terhadap Kewajiban Pembayaran Bank Perkreditan Rakyat
b = "Kapan surat edaran No. 15/26/DPbS mulai berlaku?" # 1 Agustus 2013.
c = "Siapa nama dan jabatannya yang menandatangani surat dengan nomor 1/SEOJK.04/2013?" # NURHAIDA, kepala eksekutif pengawas pasar modal
d = "Saya ingin menyelenggarakan kegiatan pasar modal berikan saya nomor surat yang membahas mengenai hal ini!" # Peraturan Pemerintah Nomor 12 Tahun 2004
e = "Berapa persen jaminan moneter pada tanggal 20 Agustus 1958?" # 7,3%
f = "Surat edaran nomor berapa yang mengatur bank umum syariah dan unit usaha syariah?" # 15/26/DPbS
g = "Apa kepanjangan dari PAPSI?" # Pedoman Akuntansi Perbankan Syariah Indonesia
h = "apa judul peraturan nomor 112/KMK.03/2001?" # Keputusan Menteri Keuangan tentang Pemotongan Pajak Penghasil Pasal 21 atas Penghasilan berupa Uang Pesangon, Uang Tebusan Pensiun, dan Tunjangan Hari Tua atau Jaminan Hari Tua
i = "Saya ingin membuat sistem informasi lembaga jasa keuangan, berikan nomor regulasi dari peraturan yang membahas tentang manejemen risiko nya!" # 4/POJK.05/2021
j = "Apa kepanjangan dari SWDKLLJ?" # Sumbangan Wajib Dana Kecelakaan Lalu Lintas Jalan
k = "Berapa nilai SWDKLLJ dari sedan?" # Rp. 140.000
l = "Apa latar belakang dari peraturan NOMOR 4/POJK.05/2021?" # dalam bentuk list
m = "Apa itu LJKNB?" # Lembaga Jasa Keuangan Non Bank
n = "Apakah KMK Nomor 462/KMK.04/1998 masih berlaku" # tidak
o = "Apa itu Uang Pesangon?" # penghasilan yang dibayarkan oleh pemberi kerja kepada karyawan dengan nama dan dalam bentuk apapun sehubungan dengan berakhirnya masa kerja atau terjadi pemutusan  hubungan kerja, termasuk uang penghargaan masa kerja dan uang  ganti kerugian
p = "Apa itu CKPN?" # Cadangan Kerugian Penurunan Nilai.
q = "Kapan, dimana, dan oleh siapa surat nomor PER- 06/BL/2012 ditetapkan?" # Surat nomor PER-06/BL/2012 ditetapkan pada tanggal 22 November 2012 di Jakarta oleh Ketua Badan Pengawas Pasar Modal dan Lembaga Keuangan.
r = "Apa kepanjangan PSAK?" # Pernyataan Standar Akuntansi Keuangan
s = "Apa itu 'shahibul maal'?" # Pemilik dana pihak ketiga

query_str = e

### **Retrieve**

In [37]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})

In [38]:
retrieved_docs = retriever.invoke(input=query_str)
retrieved_docs

[Document(metadata={'effective_date': '31 Desember 1958', 'file_name': 'ojk-undang_undang-84_tahun_1958-31121958-uu_republik_indonesia_tentang_pengubahan_pasal_pasal_16_dan_19_undang_undang_pokok_bank_indonesia_uu_nomor_84_tahun_1958_pdf.pdf', 'file_url': 'https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf', 'regulation_number': '84 Tahun 1958', 'regulation_type': 'Undang-Undang', 'sector': 'Perbankan', 'subsector': 'Bank Umum', 'title': 'Undang-Undang Republik Indonesia tentang Pengubahan Pasal-Pasal 16 dan 19 Undang-Undang Pokok Bank Indonesia'}, page_content='PRESIDEN\nREPUBLIK INDONESIA\nPENJELASAN\nTENTANG\nUNDANG-UNDANG PERUBAHAN PASAL-PASAL 16 dan 19\nDARI UNDANG-UNDANG POKOK BANK INDONESIA.\nI.\nPENJELASAN UMUM.\n1.\nDalam pasal 16 ayat 1 dari Undang-undang Pokok Bank Indonesia disebutkan\nbahwa banyaknya uang yang beredar harus dijamin sebesar 20%

### **Generate**

In [39]:
# from langchain.chains import create_history_aware_retriever, create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain_community.chat_message_histories import ChatMessageHistory
# from langchain_core.chat_history import BaseChatMessageHistory
# from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain_core.runnables.history import RunnableWithMessageHistory


# ### Contextualize question ###
# contextualize_q_system_prompt = """Given a chat history and the latest user question \
# which might reference context in the chat history, formulate a standalone question \
# which can be understood without the chat history. Do NOT answer the question, \
# just reformulate it if needed and otherwise return it as is."""
# contextualize_q_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", contextualize_q_system_prompt),
#         MessagesPlaceholder("chat_history"),
#         ("human", "{input}"),
#     ]
# )
# history_aware_retriever = create_history_aware_retriever(
#     llm_model, retriever, contextualize_q_prompt
# )

# ### Answer question ###
# qa_system_prompt = """\
# Context information is below.
# context = {context}

# Given the context and the metadata information and not prior knowledge, \
# answer the query asking about banking compliance in Indonesia. 
# Answer the question based on the context and the metadata information.
# ALWAYS ANSWER WITH USER'S LANGUAGE.
# ALWAYS provide your answer with [regulation_number](file_url) metadata \
# (if the answer only in a specific regulation) in the following format:

# Answer... \n\n
# Source: [context[0].metadata['regulation_number']](context[0].metadata['file_url'])

# For "Answer" retrieve from context.page_content the answer to the user's question.
# For "Source" retrieve from context.metadata['regulation_number'] the regulation number and from context.metadata['file_url'] the file url.
# """
# qa_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", qa_system_prompt),
#         MessagesPlaceholder("chat_history"),
#         ("human", "{input}"),
#     ]
# )
# question_answer_chain = create_stuff_documents_chain(llm_model, qa_prompt)

# rag_chain = create_retrieval_chain(
#     history_aware_retriever, question_answer_chain)

In [40]:
# store = {}

# def get_session_history(session_id: str) -> BaseChatMessageHistory:
#     if session_id not in store:
#         store[session_id] = ChatMessageHistory()
#     return store[session_id]


# conversational_rag_chain = RunnableWithMessageHistory(
#     rag_chain,
#     get_session_history,
#     input_messages_key="input",
#     history_messages_key="chat_history",
#     output_messages_key="answer",
# )

In [41]:
# from pprint import pprint

# answer = conversational_rag_chain.invoke(
#     {"input": query_str},
#     config={
#         "configurable": {"session_id": "abc123"}
#     },
# )

# answer

In [42]:
# print(answer['answer'])

In [43]:

from langchain_core.prompts import ChatPromptTemplate

template = """\
Context information is below.
context: {context}

Given the context and the metadata information and not prior knowledge, \
answer the query asking about banking compliance in Indonesia. 
Answer the question based on the context and the metadata information.
ALWAYS ANSWER WITH USER'S LANGUAGE.
Please provide your answer with [regulation_number](file_url) in metadata 
(if possible) in the following format:

Answer... \n\n
Source: [metadata['regulation_number']](metadata['file_url'])

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [52]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    page_content = [doc.page_content for doc in docs]
    return page_content

def format_metadata(docs):
    list_dict = [doc.metadata for doc in docs]
    for dict in list_dict:
        dict.pop('file_name')
    return list_dict


rag_chain = (
    {"context": retriever,
      "question": RunnablePassthrough()}
    | prompt
    | llm_model
    # | StrOutputParser()
)

In [54]:
from pprint import pprint

response = rag_chain.invoke(
    input=query_str,
)

pprint(response)

AIMessage(content='Jaminan moneter pada tanggal 20 Agustus 1958 sebesar 7,30%. \n\nSource: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)', response_metadata={'token_usage': {'completion_tokens': 107, 'prompt_tokens': 3452, 'total_tokens': 3559}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'se

In [45]:
for chunk in rag_chain.stream(query_str):
    print(chunk, end="", flush=True)

Jaminan moneter pada tanggal 20 Agustus 1958 sebesar 7,30%. 

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf)

## **Build Agent**

In [46]:
# from langchain.tools.retriever import create_retriever_tool
# from langchain.agents import initialize_agent

# retriever_tool = create_retriever_tool(
#     retriever,
#     "langsmith_search",
#     "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
# )

# tools = [retriever_tool]

# agent = initialize_agent(
#     tools=tools,
#     llm=llm_model,
#     prompt=prompt,
#     agent="zero-shot-react-description",
# )

In [47]:
# agent.invoke({"input": query_str})