<a href="https://colab.research.google.com/github/wairiukoirwine/E-citizen-ai/blob/main/ecitizen_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

!pip install sentence-transformers faiss-cpu PyPDF2 gradio

import gradio as gr
import PyPDF2
from sentence_transformers import SentenceTransformer, util
import numpy as np
import faiss

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Global variables
pdf_paragraphs = []
pdf_embeddings = None
pdf_index = None

# Function to load PDF and process paragraphs
def upload_pdf(file):
    global pdf_paragraphs, pdf_embeddings, pdf_index
    reader = PyPDF2.PdfReader(file.name)
    pdf_paragraphs = []

    for page in reader.pages:
        text = page.extract_text()
        if text:
            # Split text into paragraphs by double newline
            pdf_paragraphs.extend([p.strip() for p in text.split("\n\n") if p.strip()])

    # Compute embeddings
    pdf_embeddings = model.encode(pdf_paragraphs, convert_to_tensor=False)
    pdf_embeddings_np = np.array(pdf_embeddings, dtype='float32')

    # Build FAISS index
    pdf_index = faiss.IndexFlatL2(pdf_embeddings_np.shape[1])
    pdf_index.add(pdf_embeddings_np)

    return "PDF uploaded and processed. Bot is ready to answer questions accurately."

# Function to answer user query
def answer_query(user_query, chat_history):
    global pdf_paragraphs, pdf_index
    if not pdf_paragraphs:
        chat_history.append((f"User: {user_query}", "Bot: Please upload the PDF first."))
        return "", chat_history

    # Encode query and search
    query_emb = model.encode([user_query], convert_to_tensor=False)
    D, I = pdf_index.search(np.array(query_emb, dtype='float32'), k=1)
    top_para = pdf_paragraphs[I[0][0]]

    chat_history.append((f"User: {user_query}", f"Bot: {top_para}"))
    return "", chat_history

# Gradio interface
with gr.Blocks() as demo:
    chat_history = gr.State([])

    gr.Markdown("## eCitizen Smart Q&A Bot (PDF-based)")

    with gr.Row():
        txt_input = gr.Textbox(label="Your Question", placeholder="Ask about eCitizen services...")
        send_btn = gr.Button("Send")

    upload_pdf_file = gr.File(label="Upload eCitizen PDF", file_types=[".pdf"])
    upload_status = gr.Textbox(label="Upload Status")

    chat_display = gr.Chatbot(label="Chat History")

    send_btn.click(answer_query, [txt_input, chat_history], [txt_input, chat_display])
    upload_pdf_file.upload(upload_pdf, upload_pdf_file, upload_status)

demo.launch()

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m76.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2, faiss-cpu
Successfully installed PyPDF2-3.0.1 faiss-cpu-1.13.1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  chat_display = gr.Chatbot(label="Chat History")
  chat_display = gr.Chatbot(label="Chat History")


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f3c216cf82186996b5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


