In [110]:
from langchain_community.document_loaders import PyPDFLoader

sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()

akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()

In [111]:
akd_data[12]

Document(metadata={'source': 'AKD Manifesto.pdf', 'page': 12}, page_content='11\nConcept Papers for Public Consultation ² Government Digitization: Initiate digitization projects in major government \ninstitutions to improve efficiency and service delivery, aiming to create a more \ntransparent and responsive public sector. This will include the deployment of \ndigital platforms for citizen services, streamlining administrative processes, and \nenhancing data management systems.\nA special stream of support will be made available to Sri Lankan software \ncompanies, new startups and joint-ventures with global leaders to be part of this \nprogram. \n ² Policy and Regulatory Framework: identify the issues in our existing policies \nand bring in the new policies that are required to attract FDIs, intellectual property \nrights, privacy & data, labor laws, bankruptcy, venture capital,  etc. \n ² National AI Policy: Develop and implement a national AI policy, focusing \non creating a national

In [112]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

sajith_docs = text_splitter.split_documents(sajith_data)
akd_docs = text_splitter.split_documents(akd_data)

In [113]:
len(sajith_docs), len(akd_docs)

(224, 122)

In [114]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [115]:
vectorstore = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith")
retriever_sajith = vectorstore.as_retriever(search_kwargs={"k": 10})

In [116]:
vectorstore = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake")
retriever_akd = vectorstore.as_retriever(search_kwargs={"k": 10})

In [117]:
from typing import List, Optional

from langchain_core.pydantic_v1 import BaseModel, Field


# class Search(BaseModel):
#     """Search for information about a person."""

#     query: str = Field(
#         ...,
#         description="Query to look up",
#     )
#     person: str = Field(
#         ...,
#         description="Person to look things up for. Should be `sajith` or `anura_kumara_dissanayake`.",
#     )


# class Compare(BaseModel):
#     """Search for information about a person."""

#     query: str = Field(
#         ...,
#         description="Query to compare",
#     )
#     persons: list = Field(
#         ...,
#         description="Persons list to compare things up for. Should be `sajith` or `anura_kumara_dissanayake`.",
#     )

class SearchAndCompare(BaseModel):
    """Search for information about a person or compare informations about persons."""

    queryType: str = Field(
        ...,
        description="Query type. Should be `search` or `compare`.",)

    query: str = Field(
        ...,
        description="Query to look up or query to compare",
    )

    candidates: int = Field(
        ...,
        description="Number of persons to search or compare.",
    )

    person1: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith` or `anura_kumara_dissanayake` or `ranil` or can be 'null'.",
    )
    person2: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith` or `anura_kumara_dissanayake` or `ranil` or can be 'null'.",
    )
    person3: str = Field(
        ...,
        description="Person to look things up for or persons to compare. Should be `sajith` or `anura_kumara_dissanayake` or `ranil` or can be 'null'.",
    )



    # persons: str = Field(
    #     ...,
    #     description="should be in a format `[person1, person2, ...]`",
    # )

In [141]:
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI

output_parser = PydanticToolsParser(tools=[Search, Compare])

system = """You have the ability to issue search queries or compare queries to get information to help answer user information."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)
structured_llm = llm.with_structured_output(SearchAndCompare)
query_analyzer = {"question": RunnablePassthrough()} | prompt | structured_llm

In [119]:
query_analyzer.invoke("what does anura and sajith say about sustainable growth?")

SearchAndCompare(queryType='compare', query='sustainable growth', candidates=2, person1='anura_kumara_dissanayake', person2='sajith', person3='null')

In [120]:
from langchain_core.runnables import chain

In [121]:
retrievers = {
    "sajith": retriever_sajith,
    "anura_kumara_dissanayake": retriever_akd,
}

In [138]:
@chain
def searchAndCompareChain(question):
    response = query_analyzer.invoke(question)
    # print(response)

    if response.queryType == "search":
        retriever = retrievers[response.person1]
        retrieved_docs = retriever.invoke(response.query)

        prompt = (
        "system :"
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context=retrieved_docs, question=question)

        result = llm.invoke(prompt)

        return result
    
    elif response.queryType == "compare":
        retriever1 = retrievers[response.person1]
        retrieved_docs1 = retriever1.invoke(response.query)

        if response.person2 != 'null':
            retriever2 = retrievers[response.person2]
            retrieved_docs2 = retriever2.invoke(response.query)
        else:
            retrieved_docs2 = ''

        if response.person3 != 'null':
            retriever3 = retrievers[response.person3]
            retrieved_docs3 = retriever3.invoke(response.query)
        else:
            retrieved_docs3 = ''

        prompt = (
        "system :"
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context1}"
        "\n\n"
        "{context2}"
        "\n\n"
        "{context3}"
        "\n\n"

        "human :"
        "{question}"
        ).format(context1=retrieved_docs1, context2=retrieved_docs2, context3=retrieved_docs3, question=question)

        result = llm.invoke(prompt)

        return result

        
        

In [142]:
question = "what does anura and sajith say about sustainable growth?"
result = searchAndCompareChain.invoke(question) 
print(result.content)

The document mentions that Anura's manifesto focuses on sustainable energy sources and minimizing the impact of energy production on the environment. Sajith's manifesto focuses on a green economy and environmental sustainability as foundational principles for economic growth and development. 

