# Add reference documents

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv('var.env')

True

In [2]:
pdf_path = "cloudeka.pdf"

loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
split_documents = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(
    model="baai/bge-multilingual-gemma2",
    base_url="https://dekallm.cloudeka.ai/"
)

vectorstore = FAISS.from_documents(split_documents, embeddings)
vectorstore.save_local("faiss_index")

### Test query from vector database

Here, reference documents that are relevant to the user prompt are retrieved from the vector database. As we can see, the retrieved documents here are just a substring of the whole content in the pdf.

In [3]:
retriever = vectorstore.as_retriever()
retrieved_documents = retriever.invoke("What is cloudeka?")
print(retrieved_documents[0].page_content)

1
Service Portal Cloudeka
Cloudeka is a Cloud Computing platform that provides various cloud services
including computing, storage, networking, and more. Cloudeka is supported
by self-service through the dashboard service portal with features to
configure, create projects, check billing, view and create rules in the
organization. users can choose carefully from these services to develop new
applications, or run existing applications on Cloudeka. Users can choose
carefully the services to develop new applications, or to run existing
applications on Cloudeka. There are two types of projects: Prepaid and 
Postpaid.
1.Prepaid
For the Prepaid type, it is used for personal needs that use personal email
addresses and can only have one project so that the subscription period is
relatively short without a letter of contract. For payment methods using 
Virtual Account, LinkAja, OVO, Credit Card.
At the beginning of registration, the deposit must be at least IDR 50,000.00.


# Run RAG

In [4]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv

load_dotenv('var.env')

True

### Initialize embedding model and chat model API

In [5]:
embeddings = OpenAIEmbeddings(
    model="baai/bge-multilingual-gemma2",
    base_url="https://dekallm.cloudeka.ai/"
)

llm = ChatOpenAI(
    model="qwen/qwen25-72b-instruct",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    base_url="https://dekallm.cloudeka.ai/"
)

### Load the vector database

In [6]:
vectorstore = FAISS.load_local(
    "faiss_index", embeddings, allow_dangerous_deserialization=True
)
retriever = vectorstore.as_retriever()

### Create prompt template and how to insert context to the template

In [7]:
prompt_template = """  
    Given this context: {context}
    Answer this question = {question}
    You need to answer the question in the sentence as same as in the context. 
    If the answer is not in the context, answer "I cannot answer"
    """
prompt_template = ChatPromptTemplate.from_template(prompt_template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Run RAG

In [8]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

prompt = "What is Cloudeka?"
response = rag_chain.invoke(prompt)
print(response)

Cloudeka is a Cloud Computing platform that provides various cloud services including computing, storage, networking, and more. Cloudeka is supported by self-service through the dashboard service portal with features to configure, create projects, check billing, view and create rules in the organization. Users can choose carefully from these services to develop new applications, or run existing applications on Cloudeka.
