## First of all lets us import all the packages those are required to build the model.

In [1]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")
from huggingface_hub import login
login(token = hf_token)

In [2]:
!pip install langchain_huggingface langchain_community gradio 

Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.70 (from langchain_huggingface)
  Downloading langchain_core-0.3.74-py3-none-any.whl.metadata (5.8 kB)
Collecting huggingface-hub>=0.33.4 (from langchain_huggingface)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading langchain_huggingface-0.3.1-py3-none-any.whl (27 kB)
Downloa

In [3]:
import gradio as gr
import os
import tempfile
from typing import List, Any, Optional
import warnings
warnings.filterwarnings("ignore")
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import (
    PyPDFLoader, 
    TextLoader, 
    CSVLoader,
    UnstructuredWordDocumentLoader,
    UnstructuredPowerPointLoader,
    UnstructuredExcelLoader
)
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

2025-08-14 06:59:48.721863: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755154789.016415      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755154789.105104      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [28]:
class MultiRagSystem:
    def __init__(self):
        self.vectorstores = None
        self.retriever = None
        self.llm = None
        self.embeddings = None
        self.qa_chain = None
        self.chunks = []
        
        self.setup_embeddings()

    def setup_embeddings(self):
        print("Trying to Initialize the Embedding model")
        try:
            model_name = 'sentence-transformers/all-MiniLM-L6-v2'
            self.embeddings = HuggingFaceEmbeddings(model_name = model_name)
            print("The Embedding model has been successfully created!! ")
        except Exception as e:
            return f"There was an error occured while creating the embedding model: {e}"
    def create_llm(self,model_name):
        print("Trying to create Your model")
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForCausalLM.from_pretrained(model_name)
            pipe = pipeline(
                'text-generation',
                tokenizer = tokenizer,
                model = model,
                max_new_tokens = 512,
                do_sample = True,
                temperature = 0.7
            )
            self.llm = HuggingFacePipeline(pipeline = pipe) 
            return f"Your model {model_name} has been created successfully"
        except Exception as e1:
            return f"There was an error in creating the model: {e1}"
    def process_files(self, files):
        if not files:
            return "Please try uploading a file"
    
        documents = []
        try:
            for file in files:
                filename = file.name
                if filename.endswith('txt'):
                    loader = TextLoader(filename).load()
                elif filename.endswith('pdf'):
                    loader = PyPDFLoader(filename).load()
                elif filename.endswith('csv'):
                    loader = CSVLoader(filename).load()
                elif filename.endswith('ppt') or filename.endswith('pptx'):
                    loader = UnstructuredPowerPointLoader(filename).load()
                elif filename.endswith('xls') or filename.endswith('xlsx'):
                    loader = UnstructuredExcelLoader(filename).load()
                else:
                    continue
                documents.extend(loader)
                print(f"Loaded {len(loader)} documents from {filename}")
        except Exception as e2:
            return f"Error Loading the File path: {e2}"
    
        try:
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
            self.chunks.extend(text_splitter.split_documents(documents))
            return "Your document is loaded successfully and its chunks were split"
        except Exception as e3:
            return f"There is an error in splitting the documents: {e3}"
    
    def create_vdb(self):
        if not self.chunks:
            return "You documents are not preprocessed"
        if not self.embeddings:
            return "Your Chunks are not stored in the vector database"
        try:
            vector_db = FAISS.from_documents(self.chunks,self.embeddings)
            self.retriever = vector_db.as_retriever()
            return "Sucessfully the Vector Database has been created"
        except Exception as e4:
            return f"The is an error in creating the Vector DataBase: {e4}"
    def create_retrieval_chain(self):
        if not self.llm:
            return "Your LLM has not been created still please complete the preprocessing steps"
        if not self.retriever:
            return "Cant Fetch data from the vector database"
        try:
            self.qa_chain = RetrievalQA.from_chain_type(
                llm = self.llm,
                retriever = self.retriever,
                return_source_documents = True,
                verbose = True
            )
            return "Your Q and A Retrieval Model has been created Successfully"
        except Exception as e5:
            return "You Q and A Retrieval Model has not been created. Please try completing the preprocessing steps"
        
    def query(self,msg,chat_history):
        if not self.qa_chain:
            return "Your Retirval QA chain has not Built yet complete building that"
        try:
            result = self.qa_chain.invoke({'query':msg})
            answer = result['result']
            source_docs = result.get('source_documents',[])
            
            source = []
            for i,docs in enumerate(source_docs[:3]):
                source_info = f"**Source{i+1} :**{docs.metadata.get('source','Unknown')}..\n"
                source_info += f"**Document Contents: ** {docs.page_content[:200]}...\n"
                source.append(source_info)
            response = f"**Bot's Reply: ** {answer}"
            if source:
                response += "\n**Sources:**\n" + "\n".join(source)
            return response,[]
        except Exception as e6:
            return "Error while Querying",[]
        
                
            
    
        

In [29]:
rag_model = MultiRagSystem()

Trying to Initialize the Embedding model
The Embedding model has been successfully created!! 


In [30]:
def build_model(model_name):
    if not model_name.strip():
        model_name = 'microsoft/phi-1_5'
    status = rag_model.create_llm(model_name)
    return status

In [31]:
def load_files(files):
    if not files:
        return "Upload Some files first"
    status = rag_model.process_files(files)
    return status

In [32]:
def create_vector_database():
    return rag_model.create_vdb()

In [33]:
def create_qa_retrieval_chain():
    return rag_model.create_retrieval_chain()

In [34]:
def ask_your_bot(msg,chat_history):
    response ,_ = rag_model.query(msg,chat_history)
    return response

In [35]:
!pip install langchain_huggingface faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [36]:
with gr.Blocks() as app:
    gr.Markdown("# Starting up the Multi Files Rag Model")
    with gr.Row():
        with gr.Column(scale = 1):
            gr.Markdown("## Set up the Pre-requisites of the models")
            with gr.Group():
                gr.Markdown("## Setting up the LLM Models")
                model_input = gr.Textbox(
                    label = 'Enter the Model name to load the model: ',
                    placeholder = 'Model names like gpt-2, microsoft/phi-1_5'
                )
                submit_model = gr.Button("Load Model")
                model_load_status = gr.Textbox("Model Loaded Staus", interactive = False)
            with gr.Group():
                gr.Markdown("## Upload the documents....")
                files = gr.Files(
                    label = 'Upload text,csv,pdf,ppt,pptx,doc,docx files',
                    file_count = 'multiple',
                    file_types = ['.pdf','.csv','.doc','.docx','.pdf','.ppt','.pptx','.xls','.xlsx']
                )
                submit_files = gr.Button("Submit Files")
                files_submit_status = gr.Textbox(label = 'Files uploaded status',interactive = False)
            with gr.Group():
                gr.Markdown("## Create a Vector Database")
                vector_btn = gr.Button("Click to Create the FAISS VectorBase")
                vector_status = gr.Textbox(label = 'Status of Creation of the Vector Base', interactive = False)
            with gr.Group():
                gr.Markdown("## Setting up QA-Chain")
                qa_chain_btn = gr.Button("Click to Create the Retrieval QA chain")
                qa_status = gr.Textbox(label = 'Status of the QA Chian', interactive = False)
        with gr.Column(scale = 2):
            gr.Markdown("## 🤖 Chat Bot Interface...")
            chat_bot = gr.Chatbot(
                label = 'Hii This is your Multi Rag Chat Bot',
                height = 400,
                show_label = True,
                container = True
            )
            with gr.Row():
                msg = gr.Textbox(
                    label = 'Enter Your question here: ',
                    placeholder = 'Ask your Bot a Question about the document that you have uploaded',
                    scale = 4
                )
                submit_msg = gr.Button("Ask Bot")
            clear_btn = gr.Button("Clear Chat")
    with gr.Row():
        gr.Markdown("## 📚 Example Questions")
        gr.Examples(
            examples=[
                ["What is the main topic discussed in the documents?"],
                ["Can you summarize the key points from the uploaded files?"],
                ["What are the important dates or numbers mentioned?"],
                ["Explain the methodology described in the documents."],
                ["What conclusions can be drawn from the data?"]
            ],
            inputs=msg
        )
    submit_model.click(fn = build_model,inputs = [model_input],outputs = [model_load_status])
    submit_files.click(fn = load_files,inputs=[files],outputs = [files_submit_status])
    vector_btn.click(fn = create_vector_database,inputs = [],outputs = [vector_status])
    qa_chain_btn.click(fn = create_qa_retrieval_chain,inputs = [],outputs = [qa_status])
    def respond(msg,chat_history):
        if not msg.strip():
            return "",chat_history,
        response = ask_your_bot(msg,chat_history)
        chat_history.append((msg,response))
        return "",chat_history
    submit_msg.click(fn = respond,inputs = [msg,chat_bot],outputs = [msg,chat_bot])
    msg.submit(fn = respond,inputs = [msg,chat_bot],outputs = [msg,chat_bot])
    clear_btn.click(fn = lambda:[],outputs = [chat_bot])
                
            

In [37]:
if __name__ == "__main__":
    app.launch()

* Running on local URL:  http://127.0.0.1:7863
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://59760882d77c9fad14.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Trying to create Your model


Device set to use cpu


Loaded 18 documents from /tmp/gradio/34a5d1ef92e4347fad2ae57454256121c1433588afd6d0743e1c251d66589cc3/unetefficientnet.pdf


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
