In [1]:
from langchain_community.document_loaders import PyPDFLoader

sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()

akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()

In [4]:
akd_data[12]

Document(metadata={'source': 'AKD Manifesto.pdf', 'page': 12}, page_content='11\nConcept Papers for Public Consultation ² Government Digitization: Initiate digitization projects in major government \ninstitutions to improve efficiency and service delivery, aiming to create a more \ntransparent and responsive public sector. This will include the deployment of \ndigital platforms for citizen services, streamlining administrative processes, and \nenhancing data management systems.\nA special stream of support will be made available to Sri Lankan software \ncompanies, new startups and joint-ventures with global leaders to be part of this \nprogram. \n ² Policy and Regulatory Framework: identify the issues in our existing policies \nand bring in the new policies that are required to attract FDIs, intellectual property \nrights, privacy & data, labor laws, bankruptcy, venture capital,  etc. \n ² National AI Policy: Develop and implement a national AI policy, focusing \non creating a national

In [23]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

sajith_docs = text_splitter.split_documents(sajith_data)
akd_docs = text_splitter.split_documents(akd_data)

In [24]:
len(sajith_docs), len(akd_docs)

(224, 122)

In [25]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [26]:
vectorstore = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith")
retriever_sajith = vectorstore.as_retriever(search_kwargs={"k": 10})

In [34]:
vectorstore = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake")
retriever_akd = vectorstore.as_retriever(search_kwargs={"k": 10})

In [35]:
from typing import List, Optional

from langchain_core.pydantic_v1 import BaseModel, Field


class Search(BaseModel):
    """Search for information about a person."""

    query: str = Field(
        ...,
        description="Query to look up",
    )
    person: str = Field(
        ...,
        description="Person to look things up for. Should be `sajith` or `anura_kumara_dissanayake`.",
    )

In [36]:
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI

output_parser = PydanticToolsParser(tools=[Search])

system = """You have the ability to issue search queries to get information to help answer user information."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=500)
structured_llm = llm.with_structured_output(Search)
query_analyzer = {"question": RunnablePassthrough()} | prompt | structured_llm

In [42]:
query_analyzer.invoke("what does akd say about sustainable growth?")

Search(query='sustainable growth', person='anura_kumara_dissanayake')

In [43]:
from langchain_core.runnables import chain

In [44]:
retrievers = {
    "sajith": retriever_sajith,
    "anura_kumara_dissanayake": retriever_akd,
}

In [45]:
@chain
def custom_chain(question):
    response = query_analyzer.invoke(question)
    retriever = retrievers[response.person]
    return retriever.invoke(response.query)

In [49]:
custom_chain.invoke("what does anura say about education?")

[Document(metadata={'page': 25, 'source': 'AKD Manifesto.pdf'}, page_content='Concept Papers for Public Consultation246.Engineering Education\nEngineering education should be reformed and restructured for producing \nindustry related practitioners.\nBy elucidating the objectives within each industry, the NPF aims to create a \nconducive regulatory environment that fosters innovation, investment, competition, \nand societal welfare while addressing sector-specific challenges and opportunities.\nPolicy Directions, Strategies and Actions\nWired Communication Industry'),
 Document(metadata={'page': 10, 'source': 'AKD Manifesto.pdf'}, page_content='equity.\n ² International Collaboration: Foster international partnerships to access \nadvanced technologies, best practices, and new markets.\n ² Infrastructure Enhancement: Develop world-class infrastructure to support the \ngrowth and competitiveness of the IT industry.'),
 Document(metadata={'page': 27, 'source': 'AKD Manifesto.pdf'}, page_co

In [79]:
@chain
def qna_chain(question):
    response = query_analyzer.invoke(question)
    retriever = retrievers[response.person]
    retrieved_docs = retriever.invoke(response.query)

    prompt = (
        "system :"
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know."
        "\n\n"
        "{context}"
        "\n\n"

        "human :"
        "{question}"
    ).format(context=retrieved_docs, question=question)

    result = llm.invoke(prompt)

    return result

In [80]:
result = qna_chain.invoke("what does sajith say about sustainable growth?") 
print(result.content)

Sajith Premadasa's manifesto states that sustainable growth requires a two-part approach: reforming the economy to make it more competitive and globally connected, and ensuring economic justice and equity through a strong social safety net. He also emphasizes the importance of promoting a green economy and environmental sustainability as foundational principles for economic growth and development. 

