<a href="https://colab.research.google.com/github/utkarshg1/Test1/blob/main/rag_pipeline_gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio langchain langchain-google-genai langchain-cohere chromadb pypdf cohere



In [2]:
import gradio as gr
from langchain_google_genai import GoogleGenerativeAI
from langchain_cohere import CohereEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
import tempfile
import os
from google.colab import userdata

# Initialize Gemini LLM and Cohere Embeddings
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
os.environ["COHERE_API_KEY"] = userdata.get("COHERE_API_KEY")
llm = GoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.3)
embeddings = CohereEmbeddings(model="embed-english-v3.0")

# Global retrieval chain
qa_chain = None

def process_pdf(pdf_file):
    """Process PDF and create retrieval chain."""
    global qa_chain

    # Gradio returns file path directly
    tmp_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file

    # Load PDF
    loader = PyPDFLoader(tmp_path)
    documents = loader.load()

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    chunks = text_splitter.split_documents(documents)

    # Create vector store
    vectorstore = Chroma.from_documents(chunks, embeddings)

    # Create retrieval chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )

    return f"✅ Processed {len(chunks)} chunks from PDF"

def query_rag(question):
    """Query using LangChain retrieval chain."""
    if qa_chain is None:
        return "❌ Please upload a PDF first!"

    result = qa_chain.invoke({"query": question})

    # Format response with sources
    answer = result["result"]
    sources = "\n\n---\nSources:\n"
    for i, doc in enumerate(result["source_documents"], 1):
        sources += f"{i}. Page {doc.metadata.get('page', 'N/A')}: {doc.page_content[:100]}...\n"

    return answer + sources

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 📄 RAG Pipeline with LangChain, Gemini & Cohere")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_btn = gr.Button("Process PDF")

    status = gr.Textbox(label="Status", interactive=False)

    question = gr.Textbox(label="Ask a question", placeholder="What is this document about?")
    answer = gr.Markdown(label="Answer")
    submit_btn = gr.Button("Get Answer")

    upload_btn.click(process_pdf, inputs=pdf_input, outputs=status)
    submit_btn.click(query_rag, inputs=question, outputs=answer)

demo.launch(debug=True)

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://177717de07448d352a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://177717de07448d352a.gradio.live


