In [5]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


url_list = [
    "https://apnews.com/article/sri-lanka-presidential-election-explained-wickremesinghe-e203abecd36d04fde21e927b9be8ba71",
    "https://www.electionguide.org/elections/id/4364/",
    "https://thediplomat.com/2024/08/sri-lankas-anura-dissanayake-a-strong-contender-for-president/",
    "https://www.isas.nus.edu.sg/papers/sri-lanka-election-2024-key-political-candidates-and-campaigns/",
    "https://www.thehindu.com/news/international/in-southern-sri-lanka-a-chant-for-change-this-poll-season/article68628670.ece",
    "https://groundviews.org/2024/09/04/the-hard-truth-supporting-ranil-now-could-trigger-the-next-crisis/",
    "https://groundviews.org/2024/08/29/the-jvp-and-ethnic-relations-walking-a-tightrope-to-2024-part-1/",
    "https://groundviews.org/2024/09/10/changes-in-support-for-the-leading-presidential-candidates/",
    "https://groundviews.org/2024/09/13/voters-want-more-government-spending-on-health-and-education/",
    "https://groundviews.org/2024/07/18/a-k-dissanayake-continues-to-lead-in-favourability-ratings/",
    "https://groundviews.org/2024/07/22/the-three-way-contest-to-win-the-presidency/",
    "https://www.vifindia.org/article/2024/september/05/Sri-Lanka-at-the-Crossroads-The-Presidential-Election-2024",
    "https://www.dailymirror.lk/breaking-news/Anura-rebuffs-Ranils-proposal-to-invite-IMF-for-debate/108-291616"
]

general_loader = WebBaseLoader(url_list)
general_data = general_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

general_docs = text_splitter.split_documents(general_data)

In [6]:
general_docs[20]

Document(metadata={'source': 'https://apnews.com/article/sri-lanka-presidential-election-explained-wickremesinghe-e203abecd36d04fde21e927b9be8ba71', 'title': "What's at stake in Sri Lanka's first presidential vote since its economic meltdown? | AP News", 'description': 'Sri Lanka will hold its presidential election Sept. 21 in a crucial vote that will decide the future course of the South Asian nation still struggling to recover from its economic collapse in 2022, which provoked mass protests and forced the former president Gotabaya Rajapaksa to flee the country an', 'language': 'en'}, page_content='10 of 20\xa0|\xa0\nProtestors burn an effigy of acting President and Prime Minister Ranil Wickremesinghe as they demand his resignation in Colombo, Sri Lanka, Tuesday, July 19, 2022. (AP Photo/Rafiq Maqbool, File)\nRead More\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n11 of 20\xa0|\xa0\nAnura Kumara Dissanayake, center, leader of opposition political party National People’s Power a

In [7]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectorstore_general = Chroma.from_documents(general_docs, embeddings, collection_name="general")
retriever_general = vectorstore_general.as_retriever(search_kwargs={"k": 10})

In [8]:
from langchain_community.document_loaders import PyPDFLoader


sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()

akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()

ranil_loader = PyPDFLoader("ranil_manifesto_compressed-output.pdf")
ranil_data = ranil_loader.load()

# split data    
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

sajith_docs = text_splitter.split_documents(sajith_data)
akd_docs = text_splitter.split_documents(akd_data)
ranil_docs = text_splitter.split_documents(ranil_data)

from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectorstore_sajith = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith_premadasa")
retriever_sajith = vectorstore_sajith.as_retriever(search_kwargs={"k": 10})

vectorstore_akd = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake")
retriever_akd = vectorstore_akd.as_retriever(search_kwargs={"k": 10})

vectorstore_ranil = Chroma.from_documents(ranil_docs, embeddings, collection_name="ranil_wickramasinghe")
retriever_ranil = vectorstore_ranil.as_retriever(search_kwargs={"k": 10})

In [9]:
retrievers = {
    "sajith_premadasa": retriever_sajith,
    "anura_kumara_dissanayake": retriever_akd,
    "ranil_wickramasinghe": retriever_ranil,
}

In [10]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import chain

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)

In [59]:
# class ToolFinder(BaseModel):
#     """Find tools to help answer a question."""

#     tool: str = Field(
#         ...,
#         description="if it does not related to any specific person it's general, or it is search and compare. should only have either `search_and_compare` or `general`",
#     )

# system_tool = """You have the ability to determine which type of question is asked by the user. either a general question which can be answered by looking into somne websites or a manifestos related question which can be answered by comparing manifestos."""
# prompt_tool = ChatPromptTemplate.from_messages(
#     [
#         ("system", system_tool),
#         ("human", "{question}"),
#     ]
# )

# structured_llm_tool = llm.with_structured_output(ToolFinder)
# tool_analyzer = {"question": RunnablePassthrough()} | prompt_tool | structured_llm_tool

In [60]:
# question = "what is blue"
# result = tool_analyzer.invoke(question)
# result.tool

In [11]:
class SearchAndCompare(BaseModel):
    """Search for information about a person or compare informations about persons."""

    queryType: str = Field(
        ...,
        description="Query type. Should be `search` or `compare` or `general`. if there's only one person name it's search, if there are many person's name it's compare, or it can be a general question which does not require any specific person",)

    query: str = Field(
        ...,
        description="Query to look up or query to compare",
    )

    candidates: int = Field(
        ...,
        description="Number of persons to search or compare. can be 0 for general questions",
    )

    person1: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person2: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person3: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )

system_query = """You have the ability to determine whether the user question is general, or it is related to a specific person or it is a comparison between multiple persons."""
prompt_query = ChatPromptTemplate.from_messages(
    [
        ("system", system_query),
        ("human", "{question}"),
    ]
)

structured_llm_query = llm.with_structured_output(SearchAndCompare)
query_analyzer = {"question": RunnablePassthrough()} | prompt_query | structured_llm_query

In [62]:
question = "compare educational policies of candidates"
response = query_analyzer.invoke(question)
print(response)

queryType='compare' query='educational policies' candidates=3 person1='sajith_premadasa' person2='anura_kumara_dissanayake' person3='ranil_wickramasinghe'


In [64]:
@chain
def searchAndCompareChain(question, response):
    if response.queryType == "search":
        retriever = retrievers[response.person1]
        retrieved_docs = retriever.invoke(response.query)

        prompt = (
        "system :"
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context=retrieved_docs, question=question)

        result = llm.invoke(prompt)

        return result
    
    elif response.queryType == "compare":
        retriever1 = retrievers[response.person1]
        retrieved_docs1 = retriever1.invoke(response.query)

        if response.person2 != 'null':
            retriever2 = retrievers[response.person2]
            retrieved_docs2 = retriever2.invoke(response.query)
        else:
            retrieved_docs2 = ''

        if response.person3 != 'null':
            retriever3 = retrievers[response.person3]
            retrieved_docs3 = retriever3.invoke(response.query)
        else:
            retrieved_docs3 = ''

        # print("=====================================")
        # print(retrieved_docs1)
        # print("=====================================")
        # print(retrieved_docs2)
        # print("=====================================")
        # print(retrieved_docs3)
        # print("=====================================")

        prompt = (
        "system :"
        "You are an assistant for comparing manifestos. "
        "Use the following pieces of retrieved context from different manifestos to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context1}"
        "\n\n"
        "{context2}"
        "\n\n"
        "{context3}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question)

        result = llm.invoke(prompt)

        return result

In [70]:
# @chain
# def qa_chain(question):
#     response = query_analyzer.invoke(question)
#     if response.queryType == "search" or response.queryType == "compare":
#         if response.queryType == "search":
#             retriever = retrievers[response.person1]
#             retrieved_docs = retriever.invoke(response.query)

#             prompt = (
#             "system :"
#             "You are an assistant for question-answering tasks. "
#             "Use the following pieces of retrieved context to answer "
#             "the question. If you don't know the answer, say that you "
#             "don't know."
#             "\n\n"
#             "{context}"
#             "\n\n"

#             "human :"
#             "{question}"
#             ).format(context=retrieved_docs, question=question)

#             result = llm.invoke(prompt)

#             return result
    
#         elif response.queryType == "compare":
#             retriever1 = retrievers[response.person1]
#             retrieved_docs1 = retriever1.invoke(response.query)

#             if response.person2 != 'null':
#                 retriever2 = retrievers[response.person2]
#                 retrieved_docs2 = retriever2.invoke(response.query)
#             else:
#                 retrieved_docs2 = ''

#             if response.person3 != 'null':
#                 retriever3 = retrievers[response.person3]
#                 retrieved_docs3 = retriever3.invoke(response.query)
#             else:
#                 retrieved_docs3 = ''

#             # print("=====================================")
#             # print(retrieved_docs1)
#             # print("=====================================")
#             # print(retrieved_docs2)
#             # print("=====================================")
#             # print(retrieved_docs3)
#             # print("=====================================")

#             prompt = (
#             "system :"
#             "You are an assistant for comparing manifestos. "
#             "Use the following pieces of retrieved context from different manifestos to answer "
#             "the question. If you don't know the answer, say that you "
#             "don't know."
#             "\n\n"
#             "{context1}"
#             "\n\n"
#             "{context2}"
#             "\n\n"
#             "{context3}"
#             "\n\n"

#             "human :"
#             "{question}"
#             ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question)

#             result = llm.invoke(prompt)

#             return result
#     else:
#         return llm.invoke(question)

In [31]:
@chain
def qa_chain(question):
    response = query_analyzer.invoke(question)
    if response.queryType == "search" or response.queryType == "compare":
        if response.queryType == "search":
            retriever = retrievers[response.person1]
            retrieved_docs = retriever.invoke(response.query)

            prompt = (
            "system :"
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "\n\n"
            "{context}"
            "\n\n"

            "human :"
            "{question}"
            ).format(context=retrieved_docs, question=question)

            result = llm.invoke(prompt)

            return result
    
        elif response.queryType == "compare":
            retriever1 = retrievers[response.person1]
            retrieved_docs1 = retriever1.invoke(response.query)

            if response.person2 != 'null':
                retriever2 = retrievers[response.person2]
                retrieved_docs2 = retriever2.invoke(response.query)
            else:
                retrieved_docs2 = ''

            if response.person3 != 'null':
                retriever3 = retrievers[response.person3]
                retrieved_docs3 = retriever3.invoke(response.query)
            else:
                retrieved_docs3 = ''

            # print("=====================================")
            # print(retrieved_docs1)
            # print("=====================================")
            # print(retrieved_docs2)
            # print("=====================================")
            # print(retrieved_docs3)
            # print("=====================================")

            prompt = (
            "system :"
            "You are an assistant for comparing manifestos. "
            "Use the following pieces of retrieved context from different manifestos to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "\n\n"
            "{context1}"
            "\n\n"
            "{context2}"
            "\n\n"
            "{context3}"
            "\n\n"

            "human :"
            "{question}"
            ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question)

            result = llm.invoke(prompt)

            return result
    else:
        retriever = retriever_general
        retrieved_docs = retriever.invoke(response.query)

        prompt = (
            "system :"
            "You are an assistant for question-answering tasks related to srilankan election."
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know."
            "or if the question is not much related to srilankan election say that this question is not related to srilankan election ass a election chatbot i can't provide you with answer this."
            "\n\n"
            "{context}"
            "\n\n"

            "human :"
            "{question}"
            ).format(context=retrieved_docs, question=question)

        result = llm.invoke(prompt)
        return result

In [32]:
question = "who is sajeev"
print(qa_chain.invoke(question).content)

This question is not related to Sri Lankan elections. As an election chatbot, I can't provide you with an answer to this. 



In [37]:
question = "9 + 4"
print(qa_chain.invoke(question).content)

This question is not related to Sri Lankan elections. As an election chatbot, I can't provide you with the answer to this. 



In [16]:
question = "what did sajith and anura said about education"
print(qa_chain.invoke(question).content)

Here's a comparison of what Sajith and Anura said about education in their manifestos:

**Sajith:**

* **Focus on Human Capital Development:** Sajith emphasizes the importance of human capital development and sees education as a fundamental right.
* **Free Transportation for Teachers:** He proposes providing free public transportation to all teachers across Sri Lanka.
* **Addressing Teacher Issues:**  He plans to address long-standing salary, pension, and service issues in the education sector.
* **Smart Schools:**  He aims to transform every school into a smart school with 100% electricity, water facilities, and physical resources.
* **Digital Learning:** Sajith prioritizes expanding digital learning platforms in schools through the "Sakwala" program.
* **Early Childhood Education:** He recognizes the importance of Early Childhood Education (ECE) and plans to establish a regulatory authority for it.
* **Modern Technology Integration:** He aims to integrate modern technology into prima