# **OJK ChatBot - LangChain**

## **Setup**

In [30]:
from dotenv import load_dotenv
load_dotenv()

True

## **Config**

In [31]:
from utils.config import get_config
from utils.questions import get_question
from utils.model_config import ModelName, get_model

config = get_config()

In [32]:
STORE = False
DELETE = False
TOP_K = 10
TOP_N = 6
model_name = ModelName.AZURE_OPENAI
query_str = get_question("e")
# query_str = "Apa judul peraturan nomor 37 /SEOJK.03/2016?"

## **Define Model**

In [33]:
llm_model, embed_model = get_model(model_name=model_name, config=config)

## **Indexing**

### **Load**

In [34]:
from utils.documents_text_extract import extract_all_documents_in_directory

documents_dir = './data/documents'
metadata_path = './data/metadata/files_metadata.csv'

if STORE:
    documents = extract_all_documents_in_directory(documents_dir, metadata_path, treshold=0.98)


### **Split**

In [35]:
from utils.document_split import document_splitter

if STORE:
    all_splits = document_splitter(docs=documents)

### **Storing**

In [36]:
from utils.vector_store import PineconeIndexManager

pinecone = PineconeIndexManager(index_name='ojk', embed_model=embed_model, config=config)

if STORE:
    pinecone.store_vector_index(docs=all_splits, delete=DELETE)
    vector_store = pinecone.load_vector_index()
else: 
    vector_store = pinecone.load_vector_index()

## **Retrieval and Generation**

### **Retrieve**

In [37]:
from langchain_cohere import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
import os

# retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
compressor = CohereRerank(cohere_api_key=config['cohere_api_key'], top_n=TOP_N)
retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=vector_store.as_retriever(search_type="similarity", search_kwargs={"k": TOP_K})
)

In [38]:
retrieved_docs = retriever.invoke(input=query_str)
retrieved_docs

 Document(metadata={'effective_date': '11 Januari 2017', 'file_url': 'https://www.ojk.go.id/id/regulasi/Documents/Pages/Perizinan-Usaha-dan-Kelembagaan-Lembaga-Penjamin/POJK%201-2017%20Perizinan%20dan%20Kelembagaan%20Lembaga%20Penjamin.pdf', 'regulation_number': '1/POJK.05/2017', 'regulation_type': 'Peraturan OJK', 'sector': 'IKNB', 'subsector': 'Lembaga Jasa Keuangan Khusus,  Lembaga Kliring dan Penjaminan', 'title': 'Perizinan Usaha dan Kelembagaan Lembaga Penjamin', 'relevance_score': 0.0011786721}, page_content='a. \nRp25.000.000.000,00 (dua puluh lima miliar rupiah) \nuntuk UUS dari Perusahaan Penjaminan dengan \nlingkup wilayah nasional; \nb. \nRpl0.000.000.000,00 (sepuluh miliar rupiah) untuk \nUUS dari Perusahaan Penjaminan dengan lingkup \nwilayah provinsi; atau \nc. \nRp5.000.000.000,00 (lima miliar rupiah) untuk UUS \ndari \nPerusahaan \nPenjaminan \ndengan \nlingkup \nwilayah kabupaten/kota.'),
 Document(metadata={'effective_date': '1 Desember 2011', 'file_url': 'https://ww

### **Generate**

In [39]:
from utils.rag_chain_with_chat_history import create_chain_with_chat_history

# Templates for prompts
_TEMPLATE = """Given the following conversation and a follow-up question, \
rephrase the follow-up question to be a standalone question in its original language. 
If the follow-up question is not clear, indicate so. If the chat history is not relevant \
to the follow-up question, please ignore the chat history.

Chat History:
{chat_history}

Follow-up Question: {question}
Standalone Question: """

_ANSWER_TEMPLATE = """The context information is below.
Context: 
{context}

Based on the context and the metadata information provided, answer the query \
related to banking compliance in Indonesia. 
Use the context and metadata information only, without relying on prior knowledge. 
ALWAYS ANSWER IN THE USER'S LANGUAGE.

Please provide your answer in the following format, including the regulation number and file URL if available:
Answer... \n\n
Source: [metadata['regulation_number']](metadata['file_url'])

If you cannot find the regulation number, just provide the answer. 
If file_url is end with .pdf, you can add the page number in the URL like this: \
[metadata['regulation_number']](metadata['file_url]#page=pagenumber)

DO NOT PROVIDE AMBIGUOUS ANSWERS.

Question: {question}
"""
# _ANSWER_TEMPLATE = """The context information is below.
# Context: 
# {context}

# Based on the context and the metadata information provided, answer the query \
# related to banking compliance in Indonesia. 
# Use the context and metadata information only, without relying on prior knowledge. 
# ALWAYS ANSWER IN THE USER'S LANGUAGE.

# Please provide your answer in the following format, including the regulation number and file URL if available:
# [ANSWER] \n\n
# Source: [metadata['regulation_number']](metadata['file_url'])

# If you cannot find the regulation number, just provide the answer. 
# If the query is about effective date, regulation type, regulation number, \
# regulation type, sector, subsector, or title information, check the context metadata first. \
# If not found, then refer to the context page_content.

# DO NOT PROVIDE AMBIGUOUS ANSWERS.

# Question: {question}
# """

chain = create_chain_with_chat_history(
    contextualize_q_prompt_str=_TEMPLATE,
    qa_system_prompt_str=_ANSWER_TEMPLATE,
    retriever=retriever,
    llm_model=llm_model,
)

In [40]:
from utils.chat_history import ChatHistory

chat_history = ChatHistory(max_history_length=5)

#### **Streaming**

In [41]:
from utils.rag_chain_with_chat_history import print_answer_stream

# await print_answer_stream(chain=chain, chat_history=chat_history, question=query_str)

#### **With Question, Answer, and Context**

In [42]:
from utils.rag_chain_with_chat_history import get_response

response = get_response(chat_history=chat_history, chain=chain, question=query_str)
response

{'question': 'Berapa persen jaminan moneter pada tanggal 20 Agustus 1958?',
 'answer': 'Jaminan moneter pada tanggal 20 Agustus 1958 adalah sebesar 7,30%. \n\nSource: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf#page=5)',

In [43]:
print(response['answer'])

Jaminan moneter pada tanggal 20 Agustus 1958 adalah sebesar 7,30%. 

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf#page=5)


In [44]:
print(response['context'])

[
  {
    "metadata": {
      "effective_date": "31 Desember 1958",
      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf",
      "regulation_number": "84 Tahun 1958",
      "regulation_type": "Undang-Undang",
      "sector": "Perbankan",
      "subsector": "Bank Umum",
      "title": "Undang-Undang Republik Indonesia tentang Pengubahan Pasal-Pasal 16 dan 19 Undang-Undang Pokok Bank Indonesia",
      "relevance_score": 0.998835
    },
  },
  {
    "metadata": {
      "effective_date": "11 Januari 2017",
      "file_url": "https://www.ojk.go.id/id/regulasi/Documents/Pages/Perizinan-Usaha-dan-Kelembagaan-Lembaga-Penjamin/POJK%201-2017%20Perizinan%20dan%20Kelembagaan%20Lembaga%20Penjamin.pdf",
      "regulation_number": "1/POJK.05/2017",
      "regulation_type": "Peraturan OJK",
      "sector": "IKNB",
      "subsector": "Lembaga Jasa Keuangan

In [45]:
print(chat_history.get_formatted_history())

----------
Human: Berapa persen jaminan moneter pada tanggal 20 Agustus 1958?
----------
Assistant: Jaminan moneter pada tanggal 20 Agustus 1958 adalah sebesar 7,30%. 

Source: [84 Tahun 1958](https://www.ojk.go.id/id/regulasi/Documents/Pages/UU-Republik-Indonesia-tentang-Pengubahan-Pasal-Pasal-16-dan-19-Undang-Undang-Pokok-Bank-Indonesia/UU%20Nomor%2084%20Tahun%201958.pdf#page=5)



## **Evaluation**