In [1]:
# -*- coding:utf-8 -*-
import numpy as np
import networkx as nx
from math import sqrt
import time
from components.utils import myadj2edg, merge_graphs,construct_graph

names = ['bio72','dpwk', 'line', 'lle', 'n2v']

#### 功能：单个根据有序features构建pearson系数OTU关联网络  
输入：单个features文件名，如dpwk.txt  
输出：单个graph文件名，如dpwk.adjlist

In [2]:
def get_pearson_network( in_fname, out_fname, threshhold ):
    
    print('in:{},out:{},threshhold:{} processing...'.format(in_fname, out_fname, threshhold))
    X = np.loadtxt( in_fname, dtype=float)
    m, n = len(X), len(X[0])
    X_mean = np.mean(X, axis=1)
    # sqrt(sum((x-x_avg)^2))
    X_ssfm2 = np.zeros(m) 
    X_first_moment = []
    for i in range(m):
        curr_arr = np.zeros(n)          # x_j - x_avg
        sum_first_moment_2 = 0
        for j in range(n):
            curr_arr[j] = X[i][j] - X_mean[i]
            sum_first_moment_2 += curr_arr[j] * curr_arr[j]
        X_ssfm2[i] = sqrt(sum_first_moment_2)
        X_first_moment.append( curr_arr )
    print('first_moment over')
    
    # 根据一阶矩计算相关系数,构建网络 --只计算上三角矩阵
    G = nx.Graph()                                  
    G.add_nodes_from(np.arange(m))   
    neg_threshhold = -1*threshhold        
    current_edges = 0
    for i in range(m):
        for j in range(i+1,m):                      
            sum = 0
            for k in range(n):
                sum += X_first_moment[i][k] * X_first_moment[j][k]
            sum /= (X_ssfm2[i] * X_ssfm2[j]) 
            if sum > threshhold or sum < neg_threshhold :
                G.add_edge(i,j)
                current_edges += 1
        print('\rnode i:{}, current_edges: {}'.format(i, current_edges),end='')                     
        
    nx.write_adjlist(G, out_fname)           
    print('\nnetwork saved')

#### 功能：宏观控制OTU关联网络构建  
参数：输入、输出文件名、阈值列表

In [None]:
threshholds = [0.5, 0.5, 0.7, 0.5, 0.8]
in_fnames = [ './features/'+name+'.txt' for name in names ]
out_fnames = [ './graphs_pearson/'+name+'.adjlist' for name in names ]

i = 0
print('begin time:{}'.format(time.asctime(time.localtime(time.time()))))    
get_pearson_network(in_fnames[i], out_fnames[i], threshholds[i] )
print('end time:{}'.format(time.asctime(time.localtime(time.time()))))    

begin time:Wed Apr 28 21:51:12 2021
in:./features/bio72.txt,out:./graphs_pearson/bio72.adjlist,threshhold:0.5 processing...
first_moment over
node i:2976, current_edges: 92274

#### Order2 graphs(dpwk,n2v,lle,line) are not joint.   
Thus some comparing machine learning algorithms don't work at all.  
For this k-neighbours is utilized here.  
#### 功能：二阶图k近邻图构建，合并相似性图  
输入：名字dpwk, topk值 
输出：adjlist格式、edgelist合并图

In [3]:
# 第一步，生成k近邻图
k_input = 5
for name in names:
    print('{} k-neighbours graph constructing...'.format(name))
    construct_graph(name, topk=k_input )
    print('{} k-neighbours graph constructed...'.format(name))

dpwk k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:06<00:00, 3712.97it/s]
25023it [00:00, 30076.50it/s]


dpwk k-neighbours graph constructed...
line k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:09<00:00, 2730.00it/s]
25023it [00:00, 32949.17it/s]


line k-neighbours graph constructed...
lle k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:08<00:00, 2817.14it/s]
25023it [00:00, 39618.30it/s]


lle k-neighbours graph constructed...
n2v k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:07<00:00, 3255.17it/s]
25023it [00:01, 24277.17it/s]


n2v k-neighbours graph constructed...


In [3]:
# 第二步，合并pearson图、k近邻图
for name in names:
    print('{} k-neighbours graph merging...'.format(name))
    merge_graphs(name, topk=k_input )
    print('{} k-neighbours graph merged...'.format(name))

dpwk k-neighbours graph merging...


  0%|          | 0/2299958 [00:00<?, ?it/s]

G1:./graphs_pearson/dpwk.adjlist & G2:./graphs_kneighbour/dpwk50.adjlist graphs merging...
before:G1 edges:2299958,G2 edges:1189250


100%|██████████| 2299958/2299958 [00:17<00:00, 134215.09it/s]
100%|██████████| 1189250/1189250 [00:08<00:00, 136502.47it/s]


after:G_merge edges:2852600
dpwk k-neighbours graph merged...
line k-neighbours graph merging...


  1%|          | 16615/2209212 [00:00<00:13, 166132.33it/s]

G1:./graphs_pearson/line.adjlist & G2:./graphs_kneighbour/line50.adjlist graphs merging...
before:G1 edges:2209212,G2 edges:963528


100%|██████████| 2209212/2209212 [00:15<00:00, 138598.88it/s]
100%|██████████| 963528/963528 [00:06<00:00, 147537.31it/s]


after:G_merge edges:2594563
line k-neighbours graph merged...
lle k-neighbours graph merging...


  0%|          | 0/2344121 [00:00<?, ?it/s]

G1:./graphs_pearson/lle.adjlist & G2:./graphs_kneighbour/lle50.adjlist graphs merging...
before:G1 edges:2344121,G2 edges:955529


100%|██████████| 2344121/2344121 [00:15<00:00, 155034.49it/s]
100%|██████████| 955529/955529 [00:06<00:00, 147077.06it/s]


after:G_merge edges:2487156
lle k-neighbours graph merged...
n2v k-neighbours graph merging...


  1%|          | 14632/2053600 [00:00<00:13, 146296.77it/s]

G1:./graphs_pearson/n2v.adjlist & G2:./graphs_kneighbour/n2v50.adjlist graphs merging...
before:G1 edges:2053600,G2 edges:1130155


100%|██████████| 2053600/2053600 [00:14<00:00, 139185.23it/s]
100%|██████████| 1130155/1130155 [00:08<00:00, 140807.97it/s]


after:G_merge edges:2596508
n2v k-neighbours graph merged...


#### 功能：adjlist文件转换为edgelist文件  
输入：dpwk.adjlist的名字dpwk等  
输出：dpwk.edgelist等, 如 0 1 \n0 6  

In [None]:
# myadj2edg()