# **OJK ChatBot - LangChain**

## **Setup**

In [41]:
from dotenv import load_dotenv
load_dotenv()

True

## **Config**

In [42]:
from utils.config import get_config
from utils.questions import get_question
from utils.model_config import ModelName, get_model

config = get_config()

In [43]:
STORE = False
DELETE = False
TOP_K = 10
TOP_N = 6
model_name = ModelName.AZURE_OPENAI
query_str = get_question("k")
# query_str = "Apa judul peraturan nomor 37 /SEOJK.03/2016?"

## **Define Model**

In [44]:
llm_model, embed_model = get_model(model_name=model_name, config=config)

## **Indexing**

### **Load**

In [45]:
from utils.documents_text_extract import extract_all_documents_in_directory

documents_dir = './data/documents'
metadata_path = './data/metadata/files_metadata.csv'

if STORE:
    documents = extract_all_documents_in_directory(documents_dir, metadata_path, treshold=0.98)


### **Split**

In [46]:
from utils.document_split import document_splitter

if STORE:
    all_splits = document_splitter(docs=documents)

### **Storing**

In [47]:
from utils.vector_store import PineconeIndexManager

pinecone = PineconeIndexManager(index_name='ojk', embed_model=embed_model, config=config)

if STORE:
    pinecone.store_vector_index(docs=all_splits, delete=DELETE)
    vector_store = pinecone.load_vector_index()
else: 
    vector_store = pinecone.load_vector_index()

## **Retrieval and Generation**

### **Retrieve**

In [48]:
from langchain_cohere import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever

# retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
compressor = CohereRerank(cohere_api_key=config['cohere_api_key'], top_n=TOP_N)
retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
)

In [49]:
retrieved_docs = retriever.invoke(input=query_str)
retrieved_docs

[Document(metadata={'effective_date': '26 Februari 2008', 'file_url': 'https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf', 'page_number': 4.0, 'regulation_number': '36/PMK.010/2008', 'regulation_type': 'Klasifikasi Bapepam', 'sector': 'IKNB', 'subsector': 'Asuransi', 'title': 'Peraturan Menteri Keuangan Nomor 36/PMK.010/2008 tentang Besar Santunan dan Sumbangan Wajib Dana Kecelakaan Lalu Lintas Jalan', 'relevance_score': 0.9850429}, page_content='2) Dalam hal pembayaran SWDKLLJ dilakukan setelah melewati batas\n seharusnya dibayar dengan ketentuan denda yang dikenakan paling\nbesar Rp100.000,00 (seratus ribu rupiah).\n(3) Dalam hal ketentuan mengenai batas waktu sebagaimana dimaksud\ngeografis daerah setempat, Direksi perusahaan yang ditunjuk untuk\nmenyelenggarakan Dana Kecelakaan Lalu Lintas Jalan diberi\nbesarnya denda SWDKLLJ, dengan ketentuan batas wak

### **Generate**

In [50]:
from utils.rag_chain_with_chat_history import create_chain_with_chat_history

_TEMPLATE = """Given the following conversation and a follow-up question, \
rephrase the follow-up question to be a standalone question in its original language. 
If the follow-up question is not clear, indicate so. 
If the chat history is not relevant to the follow-up question, please ignore the chat history.

Chat History:
{chat_history}

Follow-up Question: {question}
Standalone Question: """

_ANSWER_TEMPLATE = """The context information is below.
Context: 
{context}

Based on the context and the metadata information provided, \
answer the query related to banking compliance in Indonesia.
Use the context and metadata information only, without relying on prior knowledge. 
ALWAYS ANSWER IN THE USER'S LANGUAGE.

Please provide your answer in the following format, \
including the regulation number and file URL if available:

(Answer...) \n\n
Source: [metadata['regulation_number']](metadata['file_url'])

If you cannot find the regulation number, just provide the (Answer...). 
If the file_url ends with '.pdf', you can add the metadata['page_number'] in the URL like this: 

(Answer...) \n\n
Source: [metadata['regulation_number']](metadata['file_url#page=metadata['page_number']')

(Answer...) is the answer to the question, don't write '(Answer...)' in the answer.
DO NOT PROVIDE AMBIGUOUS ANSWERS.

Question: {question}
"""


# _ANSWER_TEMPLATE = """The context information is below.
# Context: 
# {context}

# Based on the context and the metadata information provided, answer the query \
# related to banking compliance in Indonesia. 
# Use the context and metadata information only, without relying on prior knowledge. 
# ALWAYS ANSWER IN THE USER'S LANGUAGE.

# Please provide your answer in the following format, including the regulation number and file URL if available:
# [ANSWER] \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# If you cannot find the regulation number, just provide the answer. 
# If the query is about effective date, regulation type, regulation number, \
# regulation type, sector, subsector, or title information, check the context metadata first. \
# If not found, then refer to the context page_content.

# DO NOT PROVIDE AMBIGUOUS ANSWERS.

# Question: {question}
# """

chain = create_chain_with_chat_history(
    contextualize_q_prompt_str=_TEMPLATE,
    qa_system_prompt_str=_ANSWER_TEMPLATE,
    retriever=retriever,
    llm_model=llm_model,
)

In [51]:
from utils.chat_history import ChatHistory

chat_history = ChatHistory(max_history_length=5)

#### **Streaming**

In [52]:
# from utils.rag_chain_with_chat_history import print_answer_stream

# await print_answer_stream(chain=chain, chat_history=chat_history, question=query_str)

#### **With Question, Answer, and Context**

In [57]:
from utils.rag_chain_with_chat_history import get_response

response = get_response(chat_history=chat_history, chain=chain, question="Kalau mobil?")
response

{'question': 'Berapa nilai SWDKLLJ untuk mobil?',
 'answer': 'SWDKLLJ untuk mobil adalah sebagai berikut:\n\n- Mobil penumpang angkutan umum sampai dengan 1600 cc: Rp150.000,00\n- Bus dan mikro bus bukan angkutan umum: Rp150.000,00\n- Bus dan mikro bus angkutan umum, serta mobil penumpang angkutan umum lainnya di atas 1600 cc: Rp87.000,00\n- Truk, mobil tangki, mobil gandengan, mobil barang di atas 2400 cc, truk container dan sejenisnya: Rp160.000,00\n\nSumber: [36/PMK.010/2008](https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf#page=4)',
 'context': '[\n  {\n    "metadata": {\n      "effective_date": "26 Februari 2008",\n      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf",\n      "page_number": 3.0,\n      "reg

In [58]:
print(response['answer'])

SWDKLLJ untuk mobil adalah sebagai berikut:

- Mobil penumpang angkutan umum sampai dengan 1600 cc: Rp150.000,00
- Bus dan mikro bus bukan angkutan umum: Rp150.000,00
- Bus dan mikro bus angkutan umum, serta mobil penumpang angkutan umum lainnya di atas 1600 cc: Rp87.000,00
- Truk, mobil tangki, mobil gandengan, mobil barang di atas 2400 cc, truk container dan sejenisnya: Rp160.000,00

Sumber: [36/PMK.010/2008](https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf#page=4)


In [59]:
print(response['context'])

[
  {
    "metadata": {
      "effective_date": "26 Februari 2008",
      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf",
      "page_number": 3.0,
      "regulation_number": "36/PMK.010/2008",
      "regulation_type": "Klasifikasi Bapepam",
      "sector": "IKNB",
      "subsector": "Asuransi",
      "title": "Peraturan Menteri Keuangan Nomor 36/PMK.010/2008 tentang Besar Santunan dan Sumbangan Wajib Dana Kecelakaan Lalu Lintas Jalan",
      "relevance_score": 0.9947392
    },
    "page_content": "MENTERI KEUANGAN\nREPUBLIK INDONESIA\nmemperoleh santunan sebesar Rp 25.000.000,00 (dua puluh lima\njuta rupiah).\nb. Korban yang mengalami cacat tetap berhak memperoleh\nsantunan yang besarnya dihitung berdasarkan angka prosentase\n Korban yang memerlukan perawatan dan pengobatan berhak\nrupiah).\nPasal 3\nDalam hal korban meninggal dunia akibat

In [60]:
print(chat_history.get_formatted_history())

----------
Human: Berapa nilai SWDKLLJ dari sedan?
----------
Assistant: SWDKLLJ untuk sedan sebesar Rp140.000,00 (seratus empat puluh ribu rupiah).

Sumber: [36/PMK.010/2008](https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu-Lintas-Jalan/menas13_1389258036.pdf#page=3.0)
----------
Human: Berapa nilai SWDKLLJ untuk mobil?
----------
Assistant: SWDKLLJ untuk mobil adalah sebagai berikut:

- Mobil penumpang angkutan umum sampai dengan 1600 cc: Rp150.000,00
- Bus dan mikro bus bukan angkutan umum: Rp150.000,00
- Bus dan mikro bus angkutan umum, serta mobil penumpang angkutan umum lainnya di atas 1600 cc: Rp87.000,00
- Truk, mobil tangki, mobil gandengan, mobil barang di atas 2400 cc, truk container dan sejenisnya: Rp160.000,00

Sumber: [36/PMK.010/2008](https://www.ojk.go.id/id/regulasi/Documents/Pages/PMK-Nomor-36PMK.010-Tahun-2008-tentang-Besar-Santunan-dan-Sumbangan-Wajib-Dana-Kecelakaan-Lalu

## **Evaluation**