In [None]:
import os
from dotenv import load_dotenv
load_dotenv()



### Write below code to test if objects inside '.env' folder are loaded into this program

In [None]:
if os.environ['GOOGLE_API_KEY']:
    print("API key is set")

### Initial imports required from langchain framework for chunking, creating embeddings, reading pdf, calling chat model, creating vector db, doing similarity search and retreival are all declared or imported here

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter  #doeschunking
from langchain_google_genai import GoogleGenerativeAIEmbeddings      #convertstextintovectors i.e.create embeddings
from langchain.chat_models import init_chat_model     
from langchain_community.document_loaders import PyPDFLoader        #to call the chat model gemini
#from langchain_core.vectorstores import Chroma


In [None]:
model=init_chat_model('google_genai:gemini-2.5-flash-lite')          #model name we use is defined here

### This is how you get your ll model to answer your questions by using method invoke and print it using variablename.content. This is only to test if llm model is working. It is not part of this rag project. only FYI.

In [None]:
response=model.invoke("What is AI?")
response.content

### Now lets load our PDF by giving the relative path of pdf and then using loader function to load the pdf.

In [None]:
pdf_path = "/Users/vidyashreerayar/Downloads/Learning and courses/My Projects/RAG/Ansh_Lamba_Tutorial/Festo_File_Overview.pdf"  #Sets the file path of the PDF to be loaded
loader = PyPDFLoader(pdf_path)          #Creates a PDF loader object called 'loader' to read the file
docs = loader.load()                    #Loads and extracts the text content from the PDF vie loader object
#docs                                  #print the loaded pdf

### The PDF is now loaded and our next step is to create chunks of this document

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) #Configure text splitter: 30-char chunks with 10-char overlap to preserve context
chunks = splitter.split_documents(docs)                                    #Split the loaded documents into overlapping text chunks
#len(chunks)                                                                #Get total number of generated chunks
#chunks[0:20]                                                               #Preview the first 20 chunks

### Now we covert our text into embeddings/vectors using embedding model of google gen ai

In [None]:
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.environ["GOOGLE_API_KEY"],
    batch_size=5
)

### For learning purpose only, below is how you can convert any text into embeddings i.e vectors using the method embed_query for smaller text and embed_documents for large amount of text or multiple docs

#### embed_documents is used to generate embeddings for multiple texts (such as document chunks) so they can be stored in a vector database.

#### embed_query is used to generate an embedding for a single search query, optimized for similarity matching against the stored document embeddings.

In [None]:
embeddings = embedding_model.embed_query("what")
print(len(embeddings))  #Convert the text What into embeddings or vectors
#embeddings                                        #print the generated embeddings or vectors.

In [None]:
from langchain_chroma import Chroma

### As next step, we create a vector store from langchain called Chromadb or Chroma. Langchain does all the work for you, all you have to provide is the name of the chunks variable in which you have all your chunks created from earlier steps, as the first input variable  and the variable that holds your embeding model name as your second input variable

In [None]:
small_chunks = chunks[:100]   # first 100 chunks only
vectorstore = Chroma.from_documents(
    documents=small_chunks, 
    embedding = embedding_model, 
    persist_directory="/Users/vidyashreerayar/Downloads/Learning and courses/My Projects/RAG/Ansh_Lamba_Tutorial/Vectorstore"
    )

### We now do semantic search using similarity search method of vectorstore. In this step, along with the query you want to ask, you need to give the number of top matches your RAG should retrieve from your PDF to match the answer to your question, say k=n, so it returns top n matching results to answer your question, we can use it as a context to reference it in the next step when we talk to LLM 

In [None]:
context = vectorstore.similarity_search("what is main.cpp", k=2)

### Next step is to ** Talk to LLM **, i.e as a user of this RAG, you can instruct the LLM to refer to the context variable we created above which is nothing but referring to the PDF file for answer match.You can do it by passing the context variable in a f string

In [None]:
response = llm.invoke(f"What is main.cpp, You can refer to the context:{context}")
print(response.content)

### Whatever IF we restart the kernel, the vector store gets vanished because this is an inmemory vector store and gets deleted everytime you refresh memory, to enable history and give memory to your RAG BOT, we create a directory called persist, using persist function. After that , all you have to do is querying step, because the RAG coding and its working in stored in persist directory or say a db. 