In [None]:
import os
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain_pinecone import PineconeVectorStore
from dotenv import load_dotenv

# Imports necessary libraries for loading documents, managing environment variables, and building the RAG chain.

In [None]:
def load_pdf(data):
    loader =  DirectoryLoader(data,
                    glob = "*.pdf",
                    loader_cls= PyPDFLoader)
    
    documents = loader.load()

    return documents

# Defines a function to scan the specified directory and extract text from all PDF files found.

In [None]:
extracted_data = load_pdf("data/")

# Calls the load_pdf function to process and extract raw text data from all PDFs located in the "data/" directory.

In [None]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    
    return text_chunks

# Splits the extracted text into smaller, overlapping chunks (500 chars) to prepare them for embedding.

In [None]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

# Executes the chunking process and prints the total count of resulting text segments to verify the split.

In [None]:
#download embedding model

def download_hugging_face_embedddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MinilM-L6-v2")
    return embeddings

# Initializes and downloads the specific Hugging Face model used to convert text chunks into vector embeddings.

In [None]:
embeddings = download_hugging_face_embedddings()

# Calls the function to download and initialize the embedding model, storing it in the 'embeddings' variable.

In [None]:
pinecone_key= os.getenv('PINECONE_API_KEY')
os.environ['PINECONE_API_KEY'] 
index_name = "test-chatbot"

docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks],embeddings,  index_name=index_name
) #run this line only once initially

# Retrieves the API key and uploads the vectorized text chunks into the specified Pinecone index.

In [None]:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)

# Connects to an already populated Pinecone index to enable searching without re-uploading the data.

In [None]:
prompt_template ="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer

Context: {context}
Question: {question}

Only return helpful answer below and anothing else
Helpful answer:
"""

#Defines the prompt structure that guides the LLM to answer questions based strictly on the provided context.

In [None]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context" , "question"])
chain_type_kwargs = {"prompt" : PROMPT}

# Initializes the custom prompt object and packages it into a dictionary to configure the retrieval chain.

In [None]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={
                      'max_new_tokens':512,
                         'temperature':0.8})

# Loads the local quantized Llama 2 model and configures generation parameters like token limit and creativity (temperature).

In [None]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}), 
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs)

# Constructs the main Question-Answering chain that connects the LLM, the retriever (Pinecone), and the prompt to answer queries.

In [None]:
while True:
    user_input = input(f"Input Prompt:")
    result=qa.invoke({"query" : user_input})
    print("Response :" , result["result"])

# Starts an interactive loop to continuously accept user input, run the QA chain, and print the answer.