In [42]:
from langchain_community.document_loaders import PyPDFLoader

sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()

akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()

ranil_loader = PyPDFLoader("ranil_manifesto_compressed-output.pdf")
ranil_data = ranil_loader.load()

In [43]:
ranil_data

[Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 0}, page_content=''),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 1}, page_content=''),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 2}, page_content=''),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 3}, page_content='FiveT;iumphan[ Years\nThe\nFive-Year\nMission\nThe\nTheravada\nTrade\nEconomy\nOperation\n-Beyond2o25\nTowards\na\nProsperous\nNation\nA\nWin\nfortheMotherland\nA\nUnited\nSriLanka\n'),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 4}, page_content='06\n'),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 5}, page_content='07\n'),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 6}, page_content='08\n'),
 Document(metadata={'source': 'ranil_manifesto_compressed-output.pdf', 'page': 7}, page_content='Sr

In [44]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

sajith_docs = text_splitter.split_documents(sajith_data)
akd_docs = text_splitter.split_documents(akd_data)
ranil_docs = text_splitter.split_documents(ranil_data)

In [45]:
len(sajith_docs), len(akd_docs), len(ranil_docs)

(224, 122, 127)

In [46]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [47]:
vectorstore = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith_premadasa")
retriever_sajith = vectorstore.as_retriever(search_kwargs={"k": 10})

In [48]:
vectorstore = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake")
retriever_akd = vectorstore.as_retriever(search_kwargs={"k": 10})

In [49]:
vectorstore = Chroma.from_documents(ranil_docs, embeddings, collection_name="ranil_wickramasinghe")
retriever_ranil = vectorstore.as_retriever(search_kwargs={"k": 10})

In [79]:
from typing import List, Optional

from langchain_core.pydantic_v1 import BaseModel, Field


# class Search(BaseModel):
#     """Search for information about a person."""

#     query: str = Field(
#         ...,
#         description="Query to look up",
#     )
#     person: str = Field(
#         ...,
#         description="Person to look things up for. Should be `sajith` or `anura_kumara_dissanayake`.",
#     )


# class Compare(BaseModel):
#     """Search for information about a person."""

#     query: str = Field(
#         ...,
#         description="Query to compare",
#     )
#     persons: list = Field(
#         ...,
#         description="Persons list to compare things up for. Should be `sajith` or `anura_kumara_dissanayake`.",
#     )

class SearchAndCompare(BaseModel):
    """Search for information about a person or compare informations about persons."""

    queryType: str = Field(
        ...,
        description="Query type. Should be `search` or `compare`. if there's only one person name it's search, if there are many person's name it's compare",)

    query: str = Field(
        ...,
        description="Query to look up or query to compare",
    )

    # candidates: int = Field(
    #     ...,
    #     description="Number of persons to search or compare.",
    # )

    person1: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person2: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )
    person3: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith_premadasa` or `anura_kumara_dissanayake` or `ranil_wickramasinghe` or can be 'null'.",
    )



    # persons: str = Field(
    #     ...,
    #     description="should be in a format `[person1, person2, ...]`",
    # )

In [80]:
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI

# output_parser = PydanticToolsParser(tools=[Search, Compare])

system = """You have the ability to issue search queries or compare queries to get information to help answer user information."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)
structured_llm = llm.with_structured_output(SearchAndCompare)
query_analyzer = {"question": RunnablePassthrough()} | prompt | structured_llm

In [81]:
query_analyzer.invoke("what does anura and sajith and ranil say about sustainable growth?")

SearchAndCompare(queryType='compare', query='sustainable growth', person1='anura_kumara_dissanayake', person2='sajith_premadasa', person3='ranil_wickramasinghe')

In [82]:
from langchain_core.runnables import chain

In [83]:
retrievers = {
    "sajith_premadasa": retriever_sajith,
    "anura_kumara_dissanayake": retriever_akd,
    "ranil_wickramasinghe": retriever_ranil,
}

In [84]:
@chain
def searchAndCompareChain(question):
    response = query_analyzer.invoke(question)
    print(response)

    if response.queryType == "search":
        retriever = retrievers[response.person1]
        retrieved_docs = retriever.invoke(response.query)

        prompt = (
        "system :"
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context=retrieved_docs, question=question)

        result = llm.invoke(prompt)

        return result
    
    elif response.queryType == "compare":
        retriever1 = retrievers[response.person1]
        retrieved_docs1 = retriever1.invoke(response.query)

        if response.person2 != 'null':
            retriever2 = retrievers[response.person2]
            retrieved_docs2 = retriever2.invoke(response.query)
        else:
            retrieved_docs2 = ''

        if response.person3 != 'null':
            retriever3 = retrievers[response.person3]
            retrieved_docs3 = retriever3.invoke(response.query)
        else:
            retrieved_docs3 = ''

        # print("=====================================")
        # print(retrieved_docs1)
        # print("=====================================")
        # print(retrieved_docs2)
        # print("=====================================")
        # print(retrieved_docs3)
        # print("=====================================")

        prompt = (
        "system :"
        "You are an assistant for comparing manifestos. "
        "Use the following pieces of retrieved context from different manifestos to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context1}"
        "\n\n"
        "{context2}"
        "\n\n"
        "{context3}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question)

        result = llm.invoke(prompt)

        return result

        
        

In [85]:
question = "who are the srilankan presidential candidates in 2024?"
result = searchAndCompareChain.invoke(question) 
# result = llm.invoke(question)
print(result.content)

queryType='search' query='Sri Lankan presidential candidates in 2024' person1='null' person2='null' person3='null'


KeyError: 'null'

In [77]:
class ToolFinder(BaseModel):
    """Find tools to help answer a question."""

    tool: str = Field(
        ...,
        description="which type of question is asked by the user. either a general question which can be answered by looking into somne websites or a manifestos related question which can be answered by comparing manifestos. should only have either `general` or `search_and_compare`",
    )

system = """You have the ability to determine which type of question is asked by the user. either a general question which can be answered by looking into somne websites or a manifestos related question which can be answered by comparing manifestos."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)
structured_llm_tool = llm.with_structured_output(ToolFinder)
tool_analyzer = {"question": RunnablePassthrough()} | prompt | structured_llm_tool

In [78]:
question = "what did sajith said about education"
result = tool_analyzer.invoke(question)
result.tool

'general'

In [59]:
@chain
def generalChain(question):
    result = llm.invoke(question)
    return result

In [66]:
@chain
def toolFinderAndExecuterChain(question):
    tool = tool_analyzer.invoke(question)
    print(tool)

    if tool.tool == "general":
        return generalChain.invoke(question)
    elif tool.tool == "search_and_compare":
        return searchAndCompareChain.invoke(question)

question = "what is blue"
result = toolFinderAndExecuterChain.invoke(question)
print(result.content)

tool='general'
Blue is a color! 

It's one of the primary colors, meaning you can't make it by mixing other colors. 

Here are some things that are blue:

* **The sky**
* **The ocean**
* **Blueberries**
* **Jeans**
* **Some flowers**

Blue can also represent different things:

* **Sadness**
* **Peace**
* **Trust**
* **Loyalty**

What else would you like to know about blue? 



In [69]:
question = "ranil about education"
result = toolFinderAndExecuterChain.invoke(question)
print(result.content)

tool='general'
Please provide me with more context about what you'd like to know about Ranil and education. 

For example, are you interested in:

* **Ranil Wickremesinghe's** views on education as the **President of Sri Lanka**?
* **Ranil Wickremesinghe's** **personal experiences** with education?
* **Ranil Wickremesinghe's** **policies** on education as a **former Prime Minister**?
* **Ranil Wickremesinghe's** **opinions** on specific aspects of education, such as **curriculum**, **funding**, or **access**?

Once you give me more information, I can provide you with a more relevant and helpful response. 

