In [None]:
# Cell 2: 导入与环境配置
import os
import asyncio
from pathlib import Path
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
# 修复：使用新的导入路径
from langchain_core.documents import Document
from dotenv import load_dotenv, find_dotenv

# 1. 加载环境变量 (自动寻找项目根目录的 .env)
load_dotenv(find_dotenv(), override=True)

# 2. 定义调试用的 File ID 和目录
DEBUG_FILE_ID = "debug_rag_001"
DATA_ROOT = Path("data")
INDEX_PATH = DATA_ROOT / DEBUG_FILE_ID / "index_faiss"

# 3. 创建 Mock 索引数据的函数
def create_mock_index():
    if INDEX_PATH.exists():
        print(f"Index already exists at {INDEX_PATH}, skipping creation.")
        return

    print("Creating mock FAISS index for debugging...")
    # 模拟一些课程内容
    texts = [
        "本课程主要讲解 Python 高级编程。",
        "在 RAG 系统中，Retrieval 指的是检索环节，Generation 指的是生成环节。",
        "DeepSeek 是一个强大的开源大模型，尤其在编码和数学领域表现优异。",
        "FAISS 是 Facebook 开源的向量检索库，用于高效的相似度搜索。",
        "要启动本项目，请运行 `python server.py`，默认端口为 8000。"
    ]
    docs = [Document(page_content=t, metadata={"page": i+1}) for i, t in enumerate(texts)]
    
    # 初始化 Embedding
    emb = HuggingFaceBgeEmbeddings(
            model_name=os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-zh-v1.5"),
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )
    
    vs = FAISS.from_documents(docs, embedding=emb)
    INDEX_PATH.mkdir(parents=True, exist_ok=True)
    vs.save_local(str(INDEX_PATH))
    print("Mock index created successfully.")

# 执行创建
try:
    create_mock_index()
except Exception as e:
    print(f"Error creating mock index: {e}")

In [None]:
# Cell 3: 核心逻辑定义
from typing import List, Dict, Any, Tuple, AsyncGenerator
from collections import defaultdict
from langchain.chat_models import init_chat_model

# --- 1. 状态管理 ---
_sessions: dict[str, list[dict]] = defaultdict(list)

def get_history(session_id: str) -> list[dict]:
    return _sessions.get(session_id, [])

def append_history(session_id: str, role: str, content: str) -> None:
    _sessions[session_id].append({"role": role, "content": content})

def clear_history(session_id: str) -> None:
    _sessions.pop(session_id, None)

# --- 2. 配置常量 (已同步 rag_agent.py) ---
MODEL_NAME = "deepseek-ai/DeepSeek-V3.2" 
SILICON_BASE_URL = "https://api.siliconflow.cn/v1"
TEMPERATURE = 0
K = 3
SCORE_TAU_TOP1 = 0.45
SCORE_TAU_MEAN3 = 0.60

# --- 3. Prompt ---
SYSTEM_INSTRUCTION = "你是九天老师团队开发的多模态 PDF 检索 RAG 聊天机器人..."
GRADE_PROMPT = "\nTask: Assess the relevance of a retrieved document to a user question.\n\nRetrieved document:\n{context}\n\nUser question: {question}\n\nReturn 'yes' if relevant, otherwise 'no'."
ANSWER_WITH_CONTEXT = "请使用提供的上下文回答用户的问题...\n{context}...\nQuestion: {question}"
ANSWER_NO_CONTEXT = "当前未找到与课程资料直接相关的片段..."

# --- 4. 辅助函数 ---
def _get_llm():
    return init_chat_model(
        model=MODEL_NAME,
        model_provider="openai", 
        openai_api_base=SILICON_BASE_URL,
        temperature=TEMPERATURE
    )

def _get_grader():
    return init_chat_model(
        model=MODEL_NAME,
        model_provider="openai",
        openai_api_base=SILICON_BASE_URL,
        temperature=0
    )

def _get_embeddings():
    return HuggingFaceBgeEmbeddings(
            model_name=os.getenv("EMBEDDING_MODEL_NAME", "BAAI/bge-small-zh-v1.5"),
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': True}
    )

def _vs_dir(file_id: str) -> str:
    return str(DATA_ROOT / file_id / "index_faiss")

def _load_vs(file_id: str) -> FAISS:
    vs_path = _vs_dir(file_id)
    if not os.path.exists(vs_path):
        raise FileNotFoundError(f"FAISS index not found at {vs_path}")
    return FAISS.load_local(vs_path, _get_embeddings(), allow_dangerous_deserialization=True)

def _score_ok(scores: List[float]) -> bool:
    if not scores: return False
    top1 = scores[0]
    mean3 = sum(scores[:3]) / min(3, len(scores))
    print(f"[Debug] Scores: {scores}, Top1: {top1}, Mean3: {mean3}")
    return (top1 <= SCORE_TAU_TOP1) or (mean3 <= SCORE_TAU_MEAN3)

print("Service functions defined with SiliconFlow config.")

In [None]:
# Cell 4: 调试检索 (retrieve)
async def retrieve(question: str, file_id: str) -> tuple[list[dict], str]:
    print(f"--- Retrieving for: {question} ---")
    vs = _load_vs(file_id)
    hits = vs.similarity_search_with_score(question, k=K)
    
    citations = []
    ctx_snippets = []
    scores = []
    
    for i, (doc, score) in enumerate(hits, start=1):
        print(f"Hit {i}: Score={score:.4f} | Content={doc.page_content[:30]}...")
        snippet_short = (doc.page_content or "").strip()
        citations.append({
            "citation_id": f"{file_id}-c{i}",
            "snippet": snippet_short,
            "score": float(score),
            "rank": i
        })
        ctx_snippets.append(f"[{i}] {snippet_short}")
        scores.append(float(score))
        
    context_text = "\n\n".join(ctx_snippets) if ctx_snippets else ""
    ok_by_score = _score_ok(scores)
    
    ok_by_llm = True
    if not ok_by_score:
        print("Invoking Grader LLM...")
        grader = _get_grader()
        grade_prompt = GRADE_PROMPT.format(context=context_text, question=question)
        decision = await grader.ainvoke([{"role": "user", "content": grade_prompt}])
        content = decision.content or ""
        print(f"Grader Output: {content}")
        ok_by_llm = "yes" in content.lower()
    
    branch = "with_context" if ok_by_llm else "no_context"
    return citations, context_text if branch == "with_context" else ""

# 测试
try:
    print(">>> Test Case 1: Relevant Question")
    res1 = await retrieve("RAG 系统里的 Generation 是什么意思？", DEBUG_FILE_ID)
    print(f"Result Branch: {'Found Context' if res1[1] else 'No Context'}\n")
except Exception as e:
    print(f"Error in Test Case 1: {e}")

In [None]:
# Cell 5: 调试流式回答 (answer_stream)
async def answer_stream(
    question: str,
    citations: list[dict],
    context_text: str,
    branch: str,
    session_id: str | None = None
) -> AsyncGenerator[dict, None]:
    if branch == "with_context" and citations:
        for c in citations:
            yield {"type": "citation", "data": c}
    
    llm = _get_llm()
    history_msgs = get_history(session_id) if session_id else []

    if branch == "with_context" and context_text:
        user_prompt = ANSWER_WITH_CONTEXT.format(question=question, context=context_text)
    else:
        user_prompt = ANSWER_NO_CONTEXT.format(question=question)

    msgs = [{"role": "system", "content": SYSTEM_INSTRUCTION}]
    msgs.extend(history_msgs)
    msgs.append({"role": "user", "content": user_prompt})

    final_text_parts = []
    try:
        async for chunk in llm.astream(msgs):
            delta = getattr(chunk, "content", None)
            if delta:
                final_text_parts.append(delta)
                yield {"type": "token", "data": delta}
    except Exception as e:
        yield {"type": "token", "data": f"Error: {e}"}

    if session_id:
        append_history(session_id, "user", question)
        append_history(session_id, "assistant", "".join(final_text_parts))

    yield {"type": "done", "data": {"used_retrieval": branch == "with_context"}}

async def test_pipeline(question):
    print(f"\n====== User Question: {question} ======")
    citations, ctx_text = await retrieve(question, DEBUG_FILE_ID)
    branch = "with_context" if ctx_text else "no_context"
    print("------ Streaming Response ------")
    async for event in answer_stream(question, citations, ctx_text, branch, "test_sess"):
        if event["type"] == "token":
            print(event["data"], end="", flush=True)
        elif event["type"] == "done":
            print(f"\n[Done]")

try:
    await test_pipeline("RAG 中的 Retrieval 是什么？")
except Exception as e:
    print(f"Total Pipeline Error: {e}")