<a href="https://colab.research.google.com/github/wairiukoirwine/E-citizen-ai/blob/main/ecitizen_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install dependencies if needed:
# pip install sentence-transformers faiss-cpu PyPDF2 gradio

import gradio as gr
import PyPDF2
from sentence_transformers import SentenceTransformer, util
import numpy as np
import faiss

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Global variables
pdf_paragraphs = []
pdf_embeddings = None
pdf_index = None

# Function to load PDF and process paragraphs
def upload_pdf(file):
    global pdf_paragraphs, pdf_embeddings, pdf_index
    reader = PyPDF2.PdfReader(file.name)
    pdf_paragraphs = []

    for page in reader.pages:
        text = page.extract_text()
        if text:
            # Split text into paragraphs by double newline
            pdf_paragraphs.extend([p.strip() for p in text.split("\n\n") if p.strip()])

    # Compute embeddings
    pdf_embeddings = model.encode(pdf_paragraphs, convert_to_tensor=False)
    pdf_embeddings_np = np.array(pdf_embeddings, dtype='float32')

    # Build FAISS index
    pdf_index = faiss.IndexFlatL2(pdf_embeddings_np.shape[1])
    pdf_index.add(pdf_embeddings_np)

    return "PDF uploaded and processed. Bot is ready to answer questions accurately."

# Function to answer user query
def answer_query(user_query, chat_history):
    global pdf_paragraphs, pdf_index
    if not pdf_paragraphs:
        chat_history.append((f"User: {user_query}", "Bot: Please upload the PDF first."))
        return "", chat_history

    # Encode query and search
    query_emb = model.encode([user_query], convert_to_tensor=False)
    D, I = pdf_index.search(np.array(query_emb, dtype='float32'), k=1)
    top_para = pdf_paragraphs[I[0][0]]

    chat_history.append((f"User: {user_query}", f"Bot: {top_para}"))
    return "", chat_history

# Gradio interface
with gr.Blocks() as demo:
    chat_history = gr.State([])

    gr.Markdown("## eCitizen Smart Q&A Bot (PDF-based)")

    with gr.Row():
        txt_input = gr.Textbox(label="Your Question", placeholder="Ask about eCitizen services...")
        send_btn = gr.Button("Send")

    upload_pdf_file = gr.File(label="Upload eCitizen PDF", file_types=[".pdf"])
    upload_status = gr.Textbox(label="Upload Status")

    chat_display = gr.Chatbot(label="Chat History")

    send_btn.click(answer_query, [txt_input, chat_history], [txt_input, chat_display])
    upload_pdf_file.upload(upload_pdf, upload_pdf_file, upload_status)

demo.launch()


  chat_display = gr.Chatbot(label="Chat History")
  chat_display = gr.Chatbot(label="Chat History")


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0085e59f818c183fef.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


