In [None]:
from neo4j import GraphDatabase
from zhipuai import ZhipuAI
import numpy as np

# 配置信息（与之前相同）
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "neo4j123456"

class VectorSearcher:
    def __init__(self):
        self.driver = GraphDatabase.driver(
            NEO4J_URI, 
            auth=(NEO4J_USER, NEO4J_PASSWORD),
            encrypted=False
        )
    
    def search_leader(self, query_vector, top_k=3):
        cypher = """
        CALL db.index.vector.queryNodes(
            'introduction_vectors',  // 索引名称
            $top_k,                  // 返回结果数
            $query_vector            // 查询向量
        )
        YIELD node AS intro, score
        MATCH (intro)<-[:HAS_INTRODUCTION]-(t:Faculty)-[:WORKS_IN]->(college)
        WHERE college.name = "昌新国际艺术学院"
        RETURN t.name AS name, 
            t.position AS position,
            intro.content AS intro,
            score AS similarity
        ORDER BY similarity DESC
        """
        try:
            with self.driver.session() as session:
                result = session.run(
                    cypher,
                    query_vector=query_vector,  # 必须为float列表
                    top_k=top_k
                )
                return [dict(record) for record in result]
        except Exception as e:
            print(f"查询失败: {str(e)}")
            return []

    @staticmethod
    def text_to_vector(text):
        """
        文本转向量（需要替换实际的嵌入模型）
        :param text: 输入文本
        """
        ZHIPU_API_KEY = ""
        client = ZhipuAI(api_key=ZHIPU_API_KEY)
        response = client.embeddings.create(
        model="embedding-3",
        dimensions=1024,
        input=[text]
        )
        return np.array(response.data[0].embedding)

if __name__ == "__main__":
    searcher = VectorSearcher()
    
    # 生成查询向量（示例查询）
    query_text = "中国画学科带头人"
    query_vector = searcher.text_to_vector(query_text)
    
    # 执行查询
    results = searcher.search_leader(query_vector)
    
    # 打印结果
    print(f"与「{query_text}」最匹配的{len(results)}位教师：")
    for i, res in enumerate(results, 1):
        print(f"\n第{i}名：{res['name']}（相似度：{res['similarity']:.4f}）")
        print(f"职称：{res['position']}")
        print(f"简介片段：{res['intro'][:50]}...")

与「中国画学科带头人」最匹配的3位教师：

第1名：陈孟昕（相似度：0.8618）
职称：
简介片段：陈孟昕, 云南大学昌新国际艺术学院中国画学科带头人, 1957年4月生于河北邢台市, 原为湖北美术学...

第2名：王林旭（相似度：0.8553）
职称：
简介片段：王林旭, 云南大学昌新国际艺术学院特聘教授, 国家有突出贡献文化艺术专家, 国务院政府特殊津贴, 人...

第3名：张东华（相似度：0.8547）
职称：
简介片段：张东华, 云南大学昌新国际艺术学院特聘教授、研究生导师, 1967年生于浙江嵊州市, 中国思想与绘画...


In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import jieba
import numpy as np

# ======== 配置参数 ========
MODEL_PATH = "./intentionmodel/results4english"  # 模型保存目录
ONNX_PATH = "./intentionmodel/model.onnx"       # ONNX模型保存路径
LABEL_MAP = {0: "vector_db", 1: "neo4j", 2: "other"}
MAX_LENGTH = 64  # 与训练时保持一致

# ======== 初始化模型和分词器 ========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载分词器（需添加领域词汇）
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True).to(device)

# ======== 导出ONNX模型 ========
def export_onnx():
    # 设置为评估模式
    model.eval()
    
    # 创建虚拟输入
    dummy_text = "example query"
    inputs = tokenizer(
        dummy_text,
        max_length=MAX_LENGTH,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    
    # 将输入移到对应设备
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)
    
    # 导出模型
    torch.onnx.export(
        model,
        (input_ids, attention_mask),
        ONNX_PATH,
        input_names=["input_ids", "attention_mask"],
        output_names=["logits"],
        dynamic_axes={
            "input_ids": {0: "batch_size", 1: "sequence_length"},
            "attention_mask": {0: "batch_size", 1: "sequence_length"},
            "logits": {0: "batch_size"}
        },
        opset_version=13,
        do_constant_folding=True
    )
    print(f"ONNX模型已导出到：{ONNX_PATH}")

# ======== 预测函数 ========
def predict(text):
    # 预处理
    inputs = tokenizer(
        text,
        max_length=MAX_LENGTH,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    ).to(device)
    
    # 推理
    with torch.no_grad():
        outputs = model(**inputs)
    
    # 解析结果
    probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
    pred_label = np.argmax(probs)
    
    return {
        "text": text,
        "intent": LABEL_MAP[pred_label],
        "confidence": float(probs[pred_label]),
        "details": {LABEL_MAP[i]: float(probs[i]) for i in range(len(LABEL_MAP))}
    }

# ======== 交互测试 ========
if __name__ == "__main__":
    # 导出ONNX模型
    export_onnx()
    
    print("\n输入'q'退出测试")
    while True:
        text = input("\n请输入问题：")
        if text.lower() == 'q':
            break
            
        result = predict(text)
        print(f"\n预测结果：{result['intent']}（置信度：{result['confidence']:.2%}）")
        print("详细概率：")
        for k, v in result["details"].items():
            print(f"  {k}: {v:.2%}")

  from .autonotebook import tqdm as notebook_tqdm


OSError: 页面文件太小，无法完成操作。 (os error 1455)