In [None]:
# 导入库
from openai import OpenAI
import json
from pyvis.network import Network
import networkx as nx

# 设置API
api_key = ""
api_base = ""
client = OpenAI(api_key=api_key, base_url=api_base)

# 读取文本文件
with open("test_fiction.txt", "r", encoding="utf-8") as f:
    text = f.read()


In [None]:
# 构建知识图谱
def split_text(text: str, chunk_size: int = 40000) -> list:
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

def build_knowledge_graph(client: OpenAI, text: str) -> dict:
    chunks = split_text(text)
    all_entities = []
    all_relations = []

    for chunk in chunks:
        try:
            response = client.chat.completions.create(
                model="gpt-4",
                messages=[
                    {
                        "role": "system", 
                        "content": "从文本中提取实体和关系，输出严格JSON格式：{\"entities\": [{\"name\": \"实体名\", \"type\": \"类型\"}], \"relations\": [{\"source\": \"实体1\", \"target\": \"实体2\", \"type\": \"关系类型\"}]}"
                    },
                    {"role": "user", "content": chunk}
                ],
                response_format={"type": "json_object"},
                temperature=0.3
            )
            chunk_data = json.loads(response.choices[0].message.content)

            all_entities.extend([e for e in chunk_data["entities"] if e not in all_entities])
            all_relations.extend([r for r in chunk_data["relations"] if r not in all_relations])
        except Exception as e:
            print(f"处理文本块时出错: {str(e)}")
            continue

    return {"entities": all_entities, "relations": all_relations}




# 可视化知识图谱
def visualize_graph(graph_data):
    net = Network(height="600px", width="100%", notebook=True)
    
    # 添加节点
    for i, entity in enumerate(graph_data["entities"]):
        net.add_node(i, label=entity["name"], title=entity["type"])
    
    # 添加边
    for relation in graph_data["relations"]:
        source_idx = next(i for i, e in enumerate(graph_data["entities"]) if e["name"] == relation["source"])
        target_idx = next(i for i, e in enumerate(graph_data["entities"]) if e["name"] == relation["target"])
        net.add_edge(source_idx, target_idx, title=relation["type"])
    
    net.show("knowledge_graph.html")
    return net

# 搜索功能
def search_graph(graph_data, query):
    results = {"entities": [], "relations": []}
    
    # 实体搜索
    for entity in graph_data["entities"]:
        if query.lower() in entity["name"].lower():
            results["entities"].append(entity)
    
    # 关系搜索
    for relation in graph_data["relations"]:
        if query.lower() in relation["type"].lower():
            results["relations"].append(relation)
    
    return results

In [18]:
# 构建知识图谱
graph_data = build_knowledge_graph(client,text)
print("知识图谱构建完成!")

# 可视化
net = visualize_graph(graph_data)
print("知识图谱已可视化，查看 knowledge_graph.html")

处理文本块时出错: Error code: 403 - {'error': {'message': 'user quota is not enough (request id: 20250717154146493555227JgR5qJye)', 'type': 'new_api_error', 'param': '', 'code': 'insufficient_user_quota'}}
处理文本块时出错: Error code: 403 - {'error': {'message': 'user quota is not enough (request id: 20250717154146641052168AHZrbSa1)', 'type': 'new_api_error', 'param': '', 'code': 'insufficient_user_quota'}}
处理文本块时出错: Error code: 403 - {'error': {'message': 'user quota is not enough (request id: 20250717154146714037108xOWkPisN)', 'type': 'new_api_error', 'param': '', 'code': 'insufficient_user_quota'}}
知识图谱构建完成!


KeyError: 'target'

In [19]:
while True:
    query = input("\n输入搜索内容 (输入 'exit' 退出): ")
    if query.lower() == "exit":
        break
        
    results = search_graph(graph_data, query)
    
    print("\n搜索结果:")
    if results["entities"]:
        print("实体:")
        for entity in results["entities"]:
            print(f"- {entity['name']} ({entity['type']})")
    
    if results["relations"]:
        print("\n关系:")
        for relation in results["relations"]:
            print(f"- {relation['source']} → {relation['type']} → {relation['target']}")
    
    if not results["entities"] and not results["relations"]:
        print("未找到相关结果")



搜索结果:
实体:
- 叶文洁 (人物)

搜索结果:
未找到相关结果

搜索结果:
实体:
- 刘慈欣 (人物)
- 三体 (书籍)
- 中国 (地点)
- 1967年 (时间)
- 红色联合 (组织)
- 四．二八兵团 (组织)
- 叶哲泰 (人物)
- 绍琳 (人物)
- 叶文洁 (人物)
- 汪淼 (人物)
- 纳米中心 (地点)
- 同仁医院 (地点)
- 幽灵倒计时 (现象)
- 李瑶 (人物)
- 豆豆 (人物)
- 申玉菲 (人物)
- 丁仪 (人物)
- 杨冬 (人物)
- 沙瑞山 (人物)
- 大史 (人物)
- 墨子 (人物)
- 周文王 (人物)
- 纣王 (人物)
- 伏羲 (人物)
- 三体 (游戏)
- 潘寒 (人物)
- 徐冰冰 (人物)
- 魏成 (人物)
- 爱因斯坦 (人物)
- 牛顿 (人物)
- 冯·诺伊曼 (人物)
- 秦始皇 (人物)

关系:
- 叶哲泰 → 夫妻 → 绍琳
- 叶哲泰 → 父女 → 叶文洁
- 绍琳 → 母女 → 叶文洁
- 汪淼 → 工作于 → 纳米中心
- 汪淼 → 经历 → 幽灵倒计时


KeyError: 'target'