In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from IPython.display import Markdown, display

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [7]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.3, max_tokens=1000)
chat_history = []


# general information
url_list = [
    "https://apnews.com/article/sri-lanka-presidential-election-explained-wickremesinghe-e203abecd36d04fde21e927b9be8ba71",
    "https://www.electionguide.org/elections/id/4364/",
    "https://thediplomat.com/2024/08/sri-lankas-anura-dissanayake-a-strong-contender-for-president/",
    "https://www.isas.nus.edu.sg/papers/sri-lanka-election-2024-key-political-candidates-and-campaigns/",
    "https://www.thehindu.com/news/international/in-southern-sri-lanka-a-chant-for-change-this-poll-season/article68628670.ece",
    "https://groundviews.org/2024/09/04/the-hard-truth-supporting-ranil-now-could-trigger-the-next-crisis/",
    "https://groundviews.org/2024/08/29/the-jvp-and-ethnic-relations-walking-a-tightrope-to-2024-part-1/",
    "https://groundviews.org/2024/09/10/changes-in-support-for-the-leading-presidential-candidates/",
    "https://groundviews.org/2024/09/13/voters-want-more-government-spending-on-health-and-education/",
    "https://groundviews.org/2024/07/18/a-k-dissanayake-continues-to-lead-in-favourability-ratings/",
    "https://groundviews.org/2024/07/22/the-three-way-contest-to-win-the-presidency/",
    "https://www.vifindia.org/article/2024/september/05/Sri-Lanka-at-the-Crossroads-The-Presidential-Election-2024",
    "https://www.dailymirror.lk/breaking-news/Anura-rebuffs-Ranils-proposal-to-invite-IMF-for-debate/108-291616"
]
general_loader = WebBaseLoader(url_list)
general_data = general_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
general_docs = text_splitter.split_documents(general_data)
vectorstore_general = Chroma.from_documents(general_docs, embeddings, collection_name="general", persist_directory="local_chroma_db")

# sajith's manifesto
sajith_loader = PyPDFLoader("Sajith_manifesto_english.pdf")
sajith_data = sajith_loader.load()
sajith_docs = text_splitter.split_documents(sajith_data)
vectorstore_sajith = Chroma.from_documents(sajith_docs, embeddings, collection_name="sajith_premadasa", persist_directory="local_chroma_db")

# akd's manifesto
akd_loader = PyPDFLoader("AKD Manifesto.pdf")
akd_data = akd_loader.load()
akd_docs = text_splitter.split_documents(akd_data)
vectorstore_akd = Chroma.from_documents(akd_docs, embeddings, collection_name="anura_kumara_dissanayake", persist_directory="local_chroma_db")

# ranil's manifesto
ranil_loader = PyPDFLoader("ranil_manifesto_compressed-output.pdf")
ranil_data = ranil_loader.load()
ranil_docs = text_splitter.split_documents(ranil_data)
vectorstore_ranil = Chroma.from_documents(ranil_docs, embeddings, collection_name="ranil_wickramasinghe", persist_directory="local_chroma_db")

In [8]:
vectorstore_sajith_loaded = Chroma(persist_directory="local_chroma_db", embedding_function=embeddings, collection_name="sajith_premadasa")
retriever_sajith_loaded = vectorstore_sajith_loaded.as_retriever(search_kwargs={"k": 10})

In [9]:
retriever_sajith_loaded.invoke("education")

[Document(metadata={'page': 20, 'source': 'Sajith_manifesto_english.pdf'}, page_content='21\nEDUCATION, \nVOCATIONAL \nTRAINING & YOUTH\nEducation is a fundamental right. Human \ncapital development is our priority. \n• In recognition of their crucial role in \ndeveloping the nation’s human capital, \nthe government will provide free public \ntransportation to all teachers across Sri \nLanka. \n• Formulating a work plan by making \npolicy decisions before January 2025 to \naddress long drawn-out salary, pension \nand service issues in the education \nsector.\n• Developing a roadmap to transform'),
 Document(metadata={'page': 20, 'source': 'Sajith_manifesto_english.pdf'}, page_content='and service issues in the education \nsector.\n• Developing a roadmap to transform \nevery school into a smart school by \nproviding 100% electricity, water \nfacilities, and physical resources to all \nschools in Sri Lanka. \n• Teacher vacancies for the teaching of \nreligion will be filled. Religious te