In [None]:
# %%
!pip install PyPDF2 gradio faiss-cpu sentence-transformers langchain python-docx groq

In [None]:
import gradio as gr
import os
import tempfile
from PyPDF2 import PdfReader
from docx import Document
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from groq import Groq

# Set your Groq API key
GROQ_API_KEY = ""  # Replace with your actual key
# === Groq API setup ===
client = Groq(api_key=GROQ_API_KEY)

# Global document index and chunks for memory
doc_chunks = []
chat_history = []
index = None
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


def load_documents(docs):
    global doc_chunks, index
    all_text = ""

    for doc in docs:
        ext = os.path.splitext(doc.name)[1].lower()
        try:
            if ext == ".pdf":
                reader = PdfReader(doc.name)
                for page in reader.pages:
                    all_text += page.extract_text() + "\n"
            elif ext == ".docx":
                d = Document(doc.name)
                for para in d.paragraphs:
                    all_text += para.text + "\n"
            elif ext == ".txt":
                with open(doc.name, 'r') as f:
                    all_text += f.read() + "\n"
        except Exception as e:
            return f"❌ Error reading {doc.name}: {e}"

    if not all_text.strip():
        return "❌ No text could be extracted."

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    doc_chunks = splitter.split_text(all_text)
    embeddings = model.encode(doc_chunks)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))
    return "✅ Documents uploaded and indexed successfully!"


def chat_with_docs(message):
    global doc_chunks, chat_history, index

    if not doc_chunks or index is None:
        return chat_history + [
            {"role": "user", "content": message},
            {"role": "assistant", "content": "❌ Please upload documents first."}
        ]

    query_embedding = model.encode([message])
    D, I = index.search(np.array(query_embedding), k=5)
    context = "\n".join([doc_chunks[i] for i in I[0]])

    full_context = "You are a helpful assistant. Use the context from uploaded documents and conversation history to answer.\n\n"
    for role, content in chat_history[-5:]:
        if role == "user":
            full_context += f"User: {content}\n"
        else:
            full_context += f"Assistant: {content}\n"
    full_context += f"\nContext:\n{context}\n\nUser: {message}\nAssistant:"

    try:
        response = client.chat.completions.create(
            model="mistral-saba-24b",
            messages=[{"role": "user", "content": full_context}]
        )
        answer = response.choices[0].message.content.strip()
    except Exception as e:
        answer = f"❌ Error from Groq API: {e}"

    chat_history.append({"role": "user", "content": message})
    chat_history.append({"role": "assistant", "content": answer})
    return chat_history


with gr.Blocks(theme=gr.themes.Soft(), title="📚 Classmate AI: Document Chatbot") as demo:
    gr.Markdown("# 📚 Classmate: Your AI Study Buddy")
    gr.Markdown("Upload multiple documents and start asking questions in a chatbot style.")

    with gr.Row():
        with gr.Column(scale=1):
            files = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="📂 Upload Documents")
            upload_status = gr.Textbox(label="", interactive=False)

            upload_button = gr.Button("📥 Process Documents")
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="💬 Chat with your Docs", type="messages")
            user_input = gr.Textbox(show_label=False, placeholder="Ask a question and press Enter")

    upload_button.click(fn=load_documents, inputs=[files], outputs=[upload_status])

    user_input.submit(fn=chat_with_docs, inputs=user_input, outputs=chatbot)

demo.launch()
