In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from IPython.display import Markdown, display

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)
chat_history = []


# general information
url_list = [
    "https://apnews.com/article/sri-lanka-presidential-election-explained-wickremesinghe-e203abecd36d04fde21e927b9be8ba71",
    "https://www.electionguide.org/elections/id/4364/",
    "https://thediplomat.com/2024/08/sri-lankas-anura-dissanayake-a-strong-contender-for-president/",
    "https://www.isas.nus.edu.sg/papers/sri-lanka-election-2024-key-political-candidates-and-campaigns/",
    "https://www.thehindu.com/news/international/in-southern-sri-lanka-a-chant-for-change-this-poll-season/article68628670.ece",
    "https://groundviews.org/2024/09/04/the-hard-truth-supporting-ranil-now-could-trigger-the-next-crisis/",
    "https://groundviews.org/2024/08/29/the-jvp-and-ethnic-relations-walking-a-tightrope-to-2024-part-1/",
    "https://groundviews.org/2024/09/10/changes-in-support-for-the-leading-presidential-candidates/",
    "https://groundviews.org/2024/09/13/voters-want-more-government-spending-on-health-and-education/",
    "https://groundviews.org/2024/07/18/a-k-dissanayake-continues-to-lead-in-favourability-ratings/",
    "https://groundviews.org/2024/07/22/the-three-way-contest-to-win-the-presidency/",
    "https://www.vifindia.org/article/2024/september/05/Sri-Lanka-at-the-Crossroads-The-Presidential-Election-2024",
    "https://www.dailymirror.lk/breaking-news/Anura-rebuffs-Ranils-proposal-to-invite-IMF-for-debate/108-291616"
]
general_loader = WebBaseLoader(url_list)
general_data = general_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
general_docs = text_splitter.split_documents(general_data)
vectorstore_general = Chroma.from_documents(general_docs, embeddings, collection_name="general")
retriever_general = vectorstore_general.as_retriever(search_kwargs={"k": 10})


# sajith's manifesto
sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()
sajith_docs = text_splitter.split_documents(sajith_data)
vectorstore_sajith = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith_premadasa")
retriever_sajith = vectorstore_sajith.as_retriever(search_kwargs={"k": 10})

# akd's manifesto
akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()
akd_docs = text_splitter.split_documents(akd_data)
vectorstore_akd = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake")
retriever_akd = vectorstore_akd.as_retriever(search_kwargs={"k": 10})

# ranil's manifesto
ranil_loader = PyPDFLoader("ranil_manifesto_compressed-output.pdf")
ranil_data = ranil_loader.load()
ranil_docs = text_splitter.split_documents(ranil_data)
vectorstore_ranil = Chroma.from_documents(ranil_docs, embeddings, collection_name="ranil_wickramasinghe")
retriever_ranil = vectorstore_ranil.as_retriever(search_kwargs={"k": 10})

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever_general = create_history_aware_retriever(
    llm, retriever_general, contextualize_q_prompt
)

history_aware_retriever_sajith = create_history_aware_retriever(
    llm, retriever_sajith, contextualize_q_prompt
)

history_aware_retriever_akd = create_history_aware_retriever(
    llm, retriever_akd, contextualize_q_prompt
)

history_aware_retriever_ranil = create_history_aware_retriever(
    llm, retriever_ranil, contextualize_q_prompt
)

retrievers = {
    "sajith_premadasa": history_aware_retriever_sajith,
    "anura_kumara_dissanayake": history_aware_retriever_akd,
    "ranil_wickramasinghe": history_aware_retriever_ranil,
}

In [3]:
class SearchAndCompare(BaseModel):
    """Search for information about a person or compare informations about persons."""
    queryType: str = Field(
        ...,
        description="Query type. Should be `search` or `compare` or `general`. if there's only one person name it's search, if there are many person's name it's compare, or it can be a general question which does not require any specific person",)

    query: str = Field(
        ...,
        description="Query to look up or query to compare",
    )

    candidates: int = Field(
        ...,
        description="Number of persons to search or compare. can be 0 for general questions",
    )

    person1: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person2: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person3: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )

system_query = """You have the ability to determine whether the user question is general, or it is related to a specific person or it is a comparison between multiple persons. if you can't find the type set it as general"""
prompt_query = ChatPromptTemplate.from_messages(
    [
        ("system", system_query),
        ("human", "{question}"),
    ]
)

structured_llm_query = llm.with_structured_output(SearchAndCompare)
query_analyzer = {"question": RunnablePassthrough()} | prompt_query | structured_llm_query

@chain
def qa_chain(question):
    response = query_analyzer.invoke(question)
    # print(response)
    if response.queryType == "search" or response.queryType == "compare":
        if response.queryType == "search":
            retriever = retrievers[response.person1]
            retrieved_docs = retriever.invoke({"input":response.query, "chat_history": chat_history})

            prompt = (
            "system :"
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "\n\n"
            "{context}"
            "\n\n"

            "chat_history :" 
            "{chat_history}"

            "human :"
            "{question}"
            ).format(context=retrieved_docs, question=question, chat_history=chat_history)

            result = llm.invoke(prompt)

            return result
    
        elif response.queryType == "compare":
            retriever1 = retrievers[response.person1]
            retrieved_docs1 = retriever1.invoke({"input":response.query, "chat_history": chat_history})

            if response.person2 != 'null':
                retriever2 = retrievers[response.person2]
                retrieved_docs2 = retriever2.invoke({"input":response.query, "chat_history": chat_history})
            else:
                retrieved_docs2 = ''

            if response.person3 != 'null':
                retriever3 = retrievers[response.person3]
                retrieved_docs3 = retriever3.invoke({"input":response.query, "chat_history": chat_history})
            else:
                retrieved_docs3 = ''

            prompt = (
            "system :"
            "You are an assistant for comparing manifestos. "
            "Use the following pieces of retrieved context from different manifestos to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "\n\n"
            "{context1}"
            "\n\n"
            "{context2}"
            "\n\n"
            "{context3}"
            "\n\n"

            "chat_history :" 
            "{chat_history}"

            "human :"
            "{question}"
            ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question, chat_history=chat_history)

            result = llm.invoke(prompt)

            return result
    else:
        retriever = history_aware_retriever_general
        retrieved_docs = retriever.invoke({"input":response.query, "chat_history": chat_history})

        prompt = (
            "system :"
            "You are an assistant for question-answering tasks related to srilankan election."
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "or if the question is not much related to srilankan election say that this question is not related to srilankan election ass a election chatbot i can't provide you with answer this."
            "\n\n"
            "{context}"
            "\n\n"

            "chat_history :" 
            "{chat_history}"

            "human :"
            "{question}"
            ).format(context=retrieved_docs, question=question, chat_history=chat_history)

        result = llm.invoke(prompt)
        return result

def chatbot(question):
    result = qa_chain.invoke(question)
    
    # retains only last 3 conversations in history
    if len(chat_history) == 6:
        chat_history.pop(0)
        chat_history.pop(0)
    
    chat_history.extend([
            HumanMessage(content=question),
            AIMessage(content=result.content),
        ])
    
    return result.content

In [6]:
question = "sajith's education policies"
display(Markdown(chatbot(question)))
print(len(chat_history))

Sajith's education policies focus on several key areas:

* **Transforming Schools:** He aims to make every school a "smart school" by providing 100% electricity, water facilities, and physical resources.
* **Teacher Support:** He promises free public transportation for all teachers and plans to address salary, pension, and service issues in the education sector.
* **Digital Learning:** He prioritizes ensuring students have access to digital learning platforms in schools.
* **Religious Education:** He plans to fill teacher vacancies for religious teaching and ensure religious teaching is handled by qualified individuals. He also proposes including inter-religious education in the school curriculum.
* **Early Childhood Education (ECE):** He recognizes ECE as fundamental to developing responsible citizens and plans to establish a regulatory authority and upgrade child-friendly and modern facilities.
* **Higher Education:** He aims to improve the international rankings of Sri Lankan universities and coordinate with IIT/IIM India to establish a university in the country. He also plans to enhance university twinning initiatives.
* **"STEEAM" Education:** He plans to create a system of "STEEAM" education that prioritizes Science, Technology, English, Engineering, Arts, and Mathematics in all institutions.
* **Curriculum Development:** He will seek expert opinions from domestic and international institutions when updating school curricula.

These are just some of the key points of Sajith's education policies. You can find more details in the provided documents. 


6
