In [4]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import h5py

# 加载biomarker结果
fmri_biomarkers = pd.read_csv('fmri_biomarkers.csv')
print(f"加载了 {len(fmri_biomarkers)} 个fMRI biomarkers")

# 定义将索引转换为矩阵坐标的函数
def index_to_matrix_coords(index, matrix_size=200):
    row = index // matrix_size
    col = index % matrix_size
    return row, col

# 获取ROI名称
def get_roi_names(file_path, graph_type="cc200"):
    roi_names = {}
    try:
        with h5py.File(file_path, "r") as f:
            if "roi_names" in f:
                roi_names = {int(k): v for k, v in f["roi_names"].items()}
            elif "atlas" in f and graph_type in f["atlas"]:
                roi_names = {int(k): v for k, v in f["atlas"][graph_type]["roi_names"].items()}
    except Exception as e:
        print(f"无法从数据文件中获取ROI名称: {e}")
    
    # 如果无法获取ROI名称，使用默认的ROI ID
    if not roi_names:
        roi_names = {i: f"ROI_{i}" for i in range(200)}
    
    return roi_names

# 将biomarker索引转换为ROI连接
def convert_biomarkers_to_roi_connections(biomarker_indices, roi_names, matrix_size=200):
    roi_connections = []
    
    for idx in biomarker_indices:
        row, col = index_to_matrix_coords(idx, matrix_size)
        
        # 获取ROI名称
        roi1_name = roi_names.get(row, f"ROI_{row}")
        roi2_name = roi_names.get(col, f"ROI_{col}")
        
        roi_connections.append((roi1_name, roi2_name))
    
    return roi_connections

# 获取ROI名称
file_path = "/home/yangzongxian/xlz/ASD_GCN/microbe/data2/abide.hdf5"
roi_names = get_roi_names(file_path, graph_type="cc200")
print(f"获取了 {len(roi_names)} 个ROI名称")

# 将biomarker索引转换为ROI连接
biomarker_indices = fmri_biomarkers['feature_index'].values
roi_connections = convert_biomarkers_to_roi_connections(biomarker_indices, roi_names)

# 创建结果列表，包含ROI连接和重要性
roi_connections_with_importance = []
for i, (roi1, roi2) in enumerate(roi_connections):
    roi_connections_with_importance.append((roi1, roi2, fmri_biomarkers['importance'].values[i]))

# 定义visualize_roi_network函数
def visualize_roi_network(roi_connections, top_k=20, output_file='roi_network.png'):
    """
    可视化ROI连接网络
    
    参数:
    - roi_connections: ROI连接列表，每个元素为(roi1_name, roi2_name, importance)
    - top_k: 只显示前k个最重要的连接
    - output_file: 输出文件名
    """
    # 创建图
    G = nx.Graph()
    
    # 添加边
    for i, (roi1, roi2, importance) in enumerate(roi_connections):
        if i >= top_k:
            break
        G.add_edge(roi1, roi2, weight=importance)
    
    # 绘制图
    plt.figure(figsize=(15, 12))
    pos = nx.spring_layout(G, seed=42, k=1, iterations=50)
    
    # 绘制节点
    nx.draw_networkx_nodes(G, pos, node_color='lightblue', 
                          node_size=700, alpha=0.8)
    
    # 绘制边
    edges = G.edges()
    weights = [G[u][v]['weight'] for u, v in edges]
    nx.draw_networkx_edges(G, pos, edgelist=edges, width=weights, 
                          alpha=0.5, edge_color='blue')
    
    # 绘制标签
    nx.draw_networkx_labels(G, pos, font_size=8, font_weight='bold')
    
    plt.title('Top ROI Connections in ASD Biomarkers', fontsize=16)
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(output_file, dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"ROI网络图已保存为 {output_file}")

# 调用函数可视化ROI网络 - 修复：使用roi_connections_with_importance
visualize_roi_network(roi_connections_with_importance, top_k=20, output_file='roi_network.png')

# 创建结果DataFrame
results = pd.DataFrame(roi_connections, columns=['ROI1', 'ROI2'])
results['importance'] = fmri_biomarkers['importance'].values
results['frequency'] = fmri_biomarkers['frequency'].values

# 按重要性排序
results = results.sort_values('importance', ascending=False)

# 打印前10个最重要的ROI连接
print("前10个最重要的ROI连接:")
for i, (roi1, roi2, importance, freq) in enumerate(zip(results['ROI1'], results['ROI2'], 
                                                      results['importance'], results['frequency'])):
    if i >= 10:
        break
    print(f"{i+1}. {roi1} - {roi2}: 重要性={importance:.6f}, 频率={freq:.2f}")

# 保存结果到CSV文件
results.to_csv('roi_connections.csv', index=False)
print("ROI连接结果已保存为 roi_connections.csv")

加载了 103 个fMRI biomarkers
无法从数据文件中获取ROI名称: [Errno 2] Unable to open file (unable to open file: name = '/home/yangzongxian/xlz/ASD_GCN/microbe/data2/abide.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
获取了 200 个ROI名称
ROI网络图已保存为 roi_network.png
前10个最重要的ROI连接:
1. ROI_156 - ROI_101: 重要性=0.000326, 频率=1.00
2. ROI_101 - ROI_156: 重要性=0.000325, 频率=1.00
3. ROI_39 - ROI_147: 重要性=0.000277, 频率=1.00
4. ROI_147 - ROI_39: 重要性=0.000274, 频率=1.00
5. ROI_111 - ROI_93: 重要性=0.000273, 频率=1.00
6. ROI_35 - ROI_199: 重要性=0.000271, 频率=1.00
7. ROI_93 - ROI_111: 重要性=0.000271, 频率=1.00
8. ROI_199 - ROI_35: 重要性=0.000270, 频率=1.00
9. ROI_192 - ROI_177: 重要性=0.000262, 频率=1.00
10. ROI_173 - ROI_175: 重要性=0.000260, 频率=1.00
ROI连接结果已保存为 roi_connections.csv
