In [None]:
import gradio as gr
import torch
import base64
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM

# =================== 1. Base64 Image Loading ===================
def image_to_base64(path):
    with open(path, "rb") as f:
        data = f.read()
    return "data:image/png;base64," + base64.b64encode(data).decode()

user_avatar = image_to_base64("man.png")  # Local user avatar
bot_avatar = image_to_base64("bot.png")   # Local bot avatar

# =================== 2. Load FAISS Vector Store ===================
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("qa_index_cleaned", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# =================== 3. Load Quantized Qwen Model ===================
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)
generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generate_text)

# =================== 4. Build RAG Chain ===================
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

# Global chat history
chat_history = []

# =================== 5. Q&A Function ===================
def rag_answer(query):
    result = qa_chain({"query": query})
    answer = result["result"]
    source_docs = result["source_documents"]
    sources = [doc.page_content[:300] + ("..." if len(doc.page_content) > 300 else "") for doc in source_docs]
    return answer, sources

# =================== 6. Generate HTML Chat Content ===================
def chatbot_fn(user_input):
    global chat_history
    answer, sources = rag_answer(user_input)
    chat_history.append((user_input, answer, sources))

    html = ""
    for user_text, bot_text, sources in chat_history:
        sources_html = ""
        if sources:
            sources_items = "".join([f"<li style='margin-bottom:6px;'>{src}</li>" for src in sources])
            sources_html = f"""
            <details style="margin-top:10px;">
                <summary style="cursor:pointer; font-weight:bold;">View source documents ({len(sources)})</summary>
                <ul style="padding-left:20px; margin-top:8px; color:#555;">{sources_items}</ul>
            </details>
            """
        html += f"""
        <div style="display:flex; align-items:flex-start; margin-bottom:10px;">
          <img src="{user_avatar}" style="width:40px; height:40px; border-radius:50%; margin-right:10px;"/>
          <div style="background:#DCF8C6; padding:10px; border-radius:10px; max-width:70%; white-space:pre-wrap;">{user_text}</div>
        </div>
        <div style="display:flex; align-items:flex-start; justify-content:flex-end; margin-bottom:20px;">
          <div style="background:#F1F0F0; padding:10px; border-radius:10px; max-width:70%; white-space:pre-wrap;">{bot_text}{sources_html}</div>
          <img src="{bot_avatar}" style="width:40px; height:40px; border-radius:50%; margin-left:10px;"/>
        </div>
        """
    return html

# =================== 7. Clear Chat History ===================
def clear_chat():
    global chat_history
    chat_history = []
    return ""

# =================== 8. Launch Gradio UI ===================
with gr.Blocks() as demo:
    gr.Markdown("# Careers Chatbot")

    chat_display = gr.HTML()
    msg = gr.Textbox(placeholder="Type your question and press enter...", label="Input")
    clear = gr.Button("Clear Chat")

    msg.submit(chatbot_fn, inputs=msg, outputs=chat_display)
    clear.click(clear_chat, outputs=chat_display)

demo.launch(server_name="127.0.0.1", server_port=7865)
