In [None]:
from utils import *
from os.path import join
import os

# Load Knowledge Graph
Use the `process_knowledge_graph` function to load knowledge graph data.

In [None]:
# Load unibiokg
link_root = "database/unibiomap"
data_root = "database/processed"

os.makedirs(data_root, exist_ok=True)
node_map_path = join(data_root, "node_map.json")
graph_path = join(data_root, "unibiomap_simp.dgl")
link_path = join(link_root, "unibiomap.links.tsv")

def nodemap2idmap(node_map):
    return {k: {vv: kk for kk, vv in v.items()} for k, v in node_map.items()}

if os.path.exists(node_map_path) and os.path.exists(graph_path):
    print("Found existing data files, loading...")
    with open(node_map_path, "r") as f:
        node_map = json.load(f)
    graph = dgl.load_graphs(graph_path)[0][0]
else:
    print("Data files not found, processing from raw links...")
    # simplify_edge=True 则忽略细粒度关系，简化关系类型为 头实体类型_尾实体类型，如 compound_protein
    if not os.path.exists(link_path):
        download_raw_kg(link_path)

    graph, node_map = process_knowledge_graph(link_path, simplify_edge=True)
    os.makedirs(data_root, exist_ok=True)
    dgl.save_graphs(graph_path, [graph])
    with open(node_map_path, "w") as f:
        json.dump(node_map, f)

id_map = nodemap2idmap(node_map)
print("Done")


# Get Subgraph
Use the `subgraph_by_node` function to obtain the subgraph for specific nodes.

In [None]:
# Set the query vertex nodes for sampling the subgraph
sample_dict = {
    "protein": ["P05091"],
}
# Set the sample depth
depth = 1
# Set the subgraph save root
sub_save_root = "database/sub/P05091"

In [None]:
# 生成子图
# 使用 subgraph_by_node 函数获取子图
sub_g, new2orig, node_map = subgraph_by_node(graph, sample_dict, node_map, depth=depth)
# 打印子图的基本信息
print(sub_g)
# 获取子图的实体和三元组信息
id_map = nodemap2idmap(node_map)
entities, triplets = report_subgraph(sub_g, id_map, save_root=sub_save_root)

# Visualize Subgraph
Using `networkx` and `pyvis` to visualize the subgraph as a static and interactive unit respectively.

In [None]:
# Configs
# 设置每种类型节点的显示数量，-1 表示无限制
display_limits = {
    'complex': 10, 
    'compound': 10,
    'disease': 10,
    'genetic_disorder': 10,
    'go': 10,
    'pathway': 10,
    'phenotype': 10,
    'protein': 10,
}
# 设置必须显示的节点，字典的值为节点名称列表（必须与 id_map 中的名称一致）
must_show = {
    "protein": ["P05091"],
    # 可为其他类型添加必显示节点
}
# 设置是否移除自环
remove_self_loop = True

G = convert_subgraph_to_networkx(sub_g, id_map, display_limits, must_show, remove_self_loop)

In [None]:
# Visualization configs
# 定义不同类型节点的颜色
color_map = {
    'complex': '#FFA07A',
    'compound': '#98FB98',
    'disease': '#FFD700',
    'genetic_disorder': '#FF69B4',
    'go': '#87CEEB',
    'pathway': '#DDA0DD',
    'phenotype': '#808080',
    'protein': '#FF6347'
}

# static visual config
node_size = 500
font_size = 10
font_color = "black"
edge_color = "gray"

In [None]:
import networkx as nx
# Display the NetworkX graph using matplotlib
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G, seed=42)  # positions for all nodes

# 获取节点颜色列表
node_colors = [color_map.get(G.nodes[node]['group'], 'gray') for node in G.nodes]

# 使用节点的 label 作为标签
labels = nx.get_node_attributes(G, 'label')
nx.draw(G, pos, labels=labels, node_size=node_size, node_color=node_colors,
        font_size=font_size, font_color=font_color, edge_color=edge_color)
# plt.title("Subgraph Visualization")
plt.show()

In [None]:
from pyvis.network import Network
from IPython.display import IFrame, display

# 利用 pyvis 进行交互式可视化
# net = Network(height='750px', width='100%', notebook=True, bgcolor='#ffffff', font_color='black')
net = Network(height='750px', width='100%', notebook=True, cdn_resources='in_line', bgcolor='#ffffff', font_color='black')
net.from_nx(G)
# 展示图形（生成的 HTML 文件将自动打开或在 notebook 中显示）
html_root = join(sub_save_root, "dgl_subgraph.html")
net.show(html_root)
print(f"Subgraph visualization saved in {html_root}")