In [1]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub
from langchain_core.runnables import RunnablePassthrough

from dotenv import load_dotenv

In [2]:
load_dotenv('var.env')

True

In [3]:
llm = ChatOpenAI(
    model="qwen/qwen25-72b-instruct",
    temperature=1,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    base_url="https://dekallm.cloudeka.ai/"
)

In [4]:
chain = llm | StrOutputParser()

In [5]:
chain.invoke("Hello")

"Hello! It's nice to meet you. How can I assist you today? Whether you have questions, need information, or just want to chat, I'm here to help!"

In [6]:
pdf_path = './cloudeka.pdf'
loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
split_documents = text_splitter.split_documents(documents)

In [7]:
embeddings = OpenAIEmbeddings(
    model="baai/bge-multilingual-gemma2",
    base_url="https://dekallm.cloudeka.ai/"
)

vectorstore = FAISS.from_documents(split_documents, embeddings)

In [8]:
# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is cloudeka?")

# show the retrieved document's content]
for i in range(3):
    print('====================================================')
    print(retrieved_documents[i].page_content[:1000])

1
Service Portal Cloudeka
Cloudeka is a Cloud Computing platform that provides various cloud services
including computing, storage, networking, and more. Cloudeka is supported
by self-service through the dashboard service portal with features to
configure, create projects, check billing, view and create rules in the
organization. users can choose carefully from these services to develop new
applications, or run existing applications on Cloudeka. Users can choose
carefully the services to develop new applications, or to run existing
applications on Cloudeka. There are two types of projects: Prepaid and 
Postpaid.
1.Prepaid
For the Prepaid type, it is used for personal needs that use personal email
addresses and can only have one project so that the subscription period is
relatively short without a letter of contract. For payment methods using 
Virtual Account, LinkAja, OVO, Credit Card.
At the beginning of registration, the deposit must be at least IDR 50,000.00.
Make sure that the depo

In [9]:
# Save the vector store
vectorstore.save_local("faiss_index")

In [10]:
# Load the vector store
new_vectorstore = FAISS.load_local(
       "faiss_index", embeddings, allow_dangerous_deserialization=True
   )

In [11]:
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

combine_docs_chain = create_stuff_documents_chain(
       llm, retrieval_qa_chat_prompt
   )

retrieval_chain = create_retrieval_chain(
       new_vectorstore.as_retriever(), combine_docs_chain
   )



In [12]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fce00d5f1c0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag

In [13]:
combine_docs_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')

In [15]:
res = retrieval_chain.invoke({"input": "What is Cloudeka?"})
print(res["answer"])

Cloudeka is a Cloud Computing platform that offers a variety of cloud services, including computing, storage, networking, and more. It supports self-service through a dashboard service portal, where users can configure, create projects, check billing, and manage rules within the organization. Users can leverage these services to develop new applications or run existing ones on the Cloudeka platform.


In [16]:
#Import Dependencies
from langchain.prompts import ChatPromptTemplate

def load_prompt():
        prompt = """ You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         """
        prompt = ChatPromptTemplate.from_template(prompt)
        return prompt

In [17]:
def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

prompt=load_prompt()

In [19]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke('What is cloudeka?')

'Cendol is an iced sweet dessert that contains droplets of pandan-flavored green rice flour jelly and coconut milk, served with palm sugar syrup. Cendol is popular across Southeast Asia.'

In [47]:
print(rag_chain.get_prompts()[0].messages[0].prompt.template)

 You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         
