In [34]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub
from langchain_core.runnables import RunnablePassthrough

from dotenv import load_dotenv

In [3]:
load_dotenv('var.env')

True

In [None]:
llm = ChatOpenAI(
    model="qwen/qwen25-72b-instruct",
    temperature=1,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    base_url="https://dekallm.cloudeka.ai/"
)

In [5]:
chain = llm | StrOutputParser()

In [6]:
chain.invoke("Hello")

'Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything specific.'

In [10]:
pdf_path = './cendol.pdf'
loader = PyPDFLoader(file_path=pdf_path)
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
split_documents = text_splitter.split_documents(documents)

In [25]:
embeddings = OpenAIEmbeddings(
    model="baai/bge-multilingual-gemma2",
    base_url="https://dekallm.cloudeka.ai/"
)

vectorstore = FAISS.from_documents(split_documents, embeddings)

In [22]:
# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is cendol?")

# show the retrieved document's content]
for i in range(3):
    print('====================================================')
    print(retrieved_documents[i].page_content[:1000])

Instruct (Cendolinst) and the second phase models
as Cendol-Chat (Cendolchat). We report the com-
plete hyperparameters used in Appendix A.
prompts, i.e., identity prompt, safety prompt, and
computational creativity prompt.
Identity Prompt Identity prompts are incorpo-
rated to provide a faithful identity of the Cendol
models. These identity prompts include the per-
sonal identity of Cendol, the etymology of the word
“cendol”, the creator information of Cendol, and
the neutrality of Cendol on various aspects, e.g.,
gender, religion, and political stance. In addition,
we also include some trivia prompts to increase the
engagingness of using Cendol. In total, we cover
125 identity prompts and to increase the representa-
tion of these prompts, we upsample the number of
identity prompts by 500 in the Cendol Collection.
Safety Prompt We manually construct safety
prompts to prevent Cendol from responding to
queries that are not appropriate according to cul-
tural norms and values in Indonesi

In [None]:
# Save the vector store
vectorstore.save_local("faiss_index")

In [None]:
# Load the vector store
new_vectorstore = FAISS.load_local(
       "faiss_index", embeddings, allow_dangerous_deserialization=True
   )

In [27]:
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

combine_docs_chain = create_stuff_documents_chain(
       llm, retrieval_qa_chat_prompt
   )

retrieval_chain = create_retrieval_chain(
       new_vectorstore.as_retriever(), combine_docs_chain
   )



In [28]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fda250377f0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag

In [29]:
combine_docs_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')

In [30]:
res = retrieval_chain.invoke({"input": "What is Cendol?"})
print(res["answer"])

Cendol is an iced sweet dessert that contains droplets of pandan-flavored green rice flour jelly and coconut milk, served with palm sugar syrup. It is a popular dessert across Southeast Asia.


In [31]:
#Import Dependencies
from langchain.prompts import ChatPromptTemplate

def load_prompt():
        prompt = """ You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         """
        prompt = ChatPromptTemplate.from_template(prompt)
        return prompt

In [32]:
def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

prompt=load_prompt()

In [35]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [36]:
rag_chain.invoke('What is Cendol?')

'Cendol is an iced sweet dessert that contains droplets of pandan-flavored green rice flour jelly and coconut milk, served with palm sugar syrup. Cendol is popular across Southeast Asia.'

In [47]:
print(rag_chain.get_prompts()[0].messages[0].prompt.template)

 You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         
