# RAG application built on gemini 

In [55]:
from langchain_community.document_loaders import PyPDFLoader # type: ignore

loader = PyPDFLoader("BITSpilaniNEW.pdf")
data = loader.load()  # entire PDF is loaded as a single Document
#data

In [56]:
len(data)

10

In [57]:
from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  54


In [58]:
docs[7]

Document(metadata={'source': 'BITSpilaniNEW.pdf', 'page': 1}, page_content='who choose Biology as an alternate to Mathematics for applying to B.Pharm. programme must \nchoose from  \nChemistry/Biotechnology or any other vocational subjects as the third subject.  \n Candidates who have not taken Chemistry in 10+2 curricula will be required to complete bridge \ncourse in Chemistry  \nin their first semester.  \n Candidate should have a minimum aggregate score of 1160 (out of maximum of 1600) in SAT \n(reasoning) (new')

In [59]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv() 

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, wod!")
vector[:5]
#vector

[0.024535702541470528,
 -0.0018522338941693306,
 -0.024097757413983345,
 -0.061664726585149765,
 0.005196405109018087]

In [60]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [61]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("Why enroll at BITS?")


In [62]:
len(retrieved_docs)

10

In [63]:
print(retrieved_docs[1].page_content)

Why enroll at BITS?  
 The highest ranked non -government institute as per QS Asia 2020 and QS India 2020.  
 Top class research -focused faculty.  
 Purely merit -based student admissions.  
 Three Indian campuses at Pilani, Goa and Hyderabad and one International campus at Dubai.  
 Over 16,500 on -campus students.  
 Option for six -month industrial training for students through a unique programme called ‘Practice 
School’.


In [64]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [65]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [66]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [67]:
response = rag_chain.invoke({"input": "Why enroll at BITS?"})
print(response["answer"])

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised InternalServerError: 500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting.


BITS is a highly ranked institution with a focus on research and merit-based admissions. It offers multiple campuses, including an international campus in Dubai, and a unique six-month industrial training program. 

