In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("my.pdf")
data = loader.load()

In [2]:
data

[Document(metadata={'source': 'my.pdf', 'page': 0}, page_content='Openai Pr oject s:\n1\nOpenai Projects:\nHereʼs a breakdown of each line in your code:\nimport logging\nImports the logging module for handling log messages, useful for debugging \nand tracking events during program execution.\nfrom aiogram import Bot, Dispatcher, executor, types\nImports key components from the aiogram library, including:\nBot for interacting with the Telegram Bot API.\nDispatcher for managing bot event handlers.\nexecutor for running the bot.\ntypes for defining message and event types.\nfrom dotenv import load_dotenv\nImports load_dotenv from dotenv to load environment variables from a .env file.\nimport os'),
 Document(metadata={'source': 'my.pdf', 'page': 1}, page_content='Openai Pr oject s:\n2\nImports the os module for interacting with the operating system, particularly \nto access environment variables.\nload_dotenv()\nLoads environment variables from a .env file into the scriptʼs environment.\np

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)

In [4]:
from langchain_chroma import Chroma 
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [5]:
from dotenv import load_dotenv
load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("learn Python")
vector[:5]

[0.029753541573882103,
 -0.0452604703605175,
 0.007054654415696859,
 -0.04593554139137268,
 0.04200207069516182]

In [7]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [8]:
retriever = vectorstore.as_retriever(search_type = "similarity",search_kwargs={"k":10})

In [9]:
retrieved_docs = retriever.invoke("What is Python?")

In [10]:
retrieved_docs

[Document(metadata={'page': 5, 'source': 'my.pdf'}, page_content='Openai Pr oject s:\n6\nasync def clear(message: types.Message):\n    """\n    A handler to clear the previous conversation and context.\n    """\n    clear_past()\n    await message.reply("I\'ve cleared the past conversation a\nnd context.")\nCreates a handler for the /clear command, which calls the clear_past() \nfunction to clear the conversation context.\nReason: This provides the user with a way to reset the conversation \nhistory through the /clear command.\npython\nCopy code\n@dispatcher.message_handler(commands=[\'start\'])\nasync def welcome(message: types.Message):\n    """\n    This handler receives messages with `/start` command.\n    """\n    await message.reply("Hi\\nI am Tele Bot!\\Created by Bappy. \nHow can I assist you?")\nDefines a handler for the /start command, sending a welcome message to \nthe user when they initiate a conversation with the bot.\nReason: The /start command serves as a standard entry

In [11]:
from langchain_google_genai import ChatGoogleGenerativeAI


llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3,max_tokens=500)

In [12]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [18]:
system_prompt = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer (although you might not have any context in this case). If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.

\n\n
{context}
{input}"""

In [19]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}"),
    ]
)

In [20]:
question_answering_chain = create_stuff_documents_chain(llm,prompt)

In [21]:
rag_chain = create_retrieval_chain(retriever,question_answering_chain)

In [23]:
response = rag_chain.invoke({"input":"What is Python?"})
response['answer']

'Python is a high-level, general-purpose programming language known for its readability and a large standard library.  It supports multiple programming paradigms, including structured (particularly procedural), object-oriented, and functional programming.  Python is commonly used for web development, data science, scripting, and more.\n'

In [24]:
print("The End")

The End
