pip install gradio

In [2]:
import gradio as gr
import torch
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM

# ========== 1. 加载 FAISS 向量库 ==========
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("qa_index_cleaned", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# ========== 2. 加载量化 Qwen 模型 ==========
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)
generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generate_text)

# ========== 3. 构建 RAG Chain ==========
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

# 全局聊天记录
chat_history = []

# ========== 4. 定义问答函数 ==========
def rag_answer(query):
    result = qa_chain({"query": query})
    answer = result["result"]
    source_docs = result["source_documents"]
    sources = [doc.page_content[:300] + ("..." if len(doc.page_content) > 300 else "") for doc in source_docs]
    return answer, sources

# ========== 5. 生成HTML聊天内容 ==========
def chatbot_fn(user_input):
    global chat_history
    answer, sources = rag_answer(user_input)
    chat_history.append((user_input, answer, sources))

    user_avatar = "https://i.imgur.com/9neE8kD.png"
    bot_avatar = "https://i.imgur.com/62HqZa9.png"

    html = ""
    for user_text, bot_text, sources in chat_history:
        sources_html = ""
        if sources:
            sources_items = "".join([f"<li style='margin-bottom:6px;'>{src}</li>" for src in sources])
            sources_html = f"""
            <details style="margin-top:10px;">
                <summary style="cursor:pointer; font-weight:bold;">📄 查看来源文档 ({len(sources)})</summary>
                <ul style="padding-left:20px; margin-top:8px; color:#555;">{sources_items}</ul>
            </details>
            """
        html += f"""
        <div style="display:flex; align-items:flex-start; margin-bottom:10px;">
          <img src="{user_avatar}" style="width:40px; height:40px; border-radius:50%; margin-right:10px;"/>
          <div style="background:#DCF8C6; padding:10px; border-radius:10px; max-width:70%; white-space:pre-wrap;">{user_text}</div>
        </div>
        <div style="display:flex; align-items:flex-start; justify-content:flex-end; margin-bottom:20px;">
          <div style="background:#F1F0F0; padding:10px; border-radius:10px; max-width:70%; white-space:pre-wrap;">{bot_text}{sources_html}</div>
          <img src="{bot_avatar}" style="width:40px; height:40px; border-radius:50%; margin-left:10px;"/>
        </div>
        """

    return html

# ========== 6. 清空聊天历史 ==========
def clear_chat():
    global chat_history
    chat_history = []
    return ""

# ========== 7. 启动 Gradio UI ==========
with gr.Blocks() as demo:
    gr.Markdown("# ChatGPT 风格 RAG 问答助手")

    chat_display = gr.HTML()
    msg = gr.Textbox(placeholder="请输入问题，回车发送...", label="输入")
    clear = gr.Button("清空对话")

    msg.submit(chatbot_fn, inputs=msg, outputs=chat_display)
    clear.click(clear_chat, outputs=chat_display)

demo.launch(server_name="127.0.0.1", server_port=7865)


  from .autonotebook import tqdm as notebook_tqdm
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=generate_text)


* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.


