In [1]:
# -*- coding:utf-8 -*-
import numpy as np
import networkx as nx
from math import sqrt
import time
from components.utils import myadj2edg, merge_graphs,construct_graph

#### 功能：单个根据有序features构建pearson系数OTU关联网络  
输入：单个features文件名，如dpwk.txt  
输出：单个graph文件名，如dpwk.adjlist

In [2]:
def get_pearson_network( in_fname, out_fname, threshhold ):
    
    print('in:{},out:{},threshhold:{} processing...'.format(in_fname, out_fname, threshhold))
    X = np.loadtxt( in_fname, dtype=float)
    m, n = len(X), len(X[0])
    X_mean = np.mean(X, axis=1)
    # sqrt(sum((x-x_avg)^2))
    X_ssfm2 = np.zeros(m) 
    X_first_moment = []
    for i in range(m):
        curr_arr = np.zeros(n)          # x_j - x_avg
        sum_first_moment_2 = 0
        for j in range(n):
            curr_arr[j] = X[i][j] - X_mean[i]
            sum_first_moment_2 += curr_arr[j] * curr_arr[j]
        X_ssfm2[i] = sqrt(sum_first_moment_2)
        X_first_moment.append( curr_arr )
    print('first_moment over')
    
    # 根据一阶矩计算相关系数,构建网络 --只计算上三角矩阵
    G = nx.Graph()                                  
    G.add_nodes_from(np.arange(m))   
    neg_threshhold = -1*threshhold        
    current_edges = 0
    for i in range(m):
        for j in range(i+1,m):                      
            sum = 0
            for k in range(n):
                sum += X_first_moment[i][k] * X_first_moment[j][k]
            sum /= (X_ssfm2[i] * X_ssfm2[j]) 
            if sum > threshhold or sum < neg_threshhold :
                G.add_edge(i,j)
                current_edges += 1
        print('\rnode i:{}, current_edges: {}'.format(i, current_edges),end='')                     
        
    nx.write_adjlist(G, out_fname)           
    print('\nnetwork saved')

#### 功能：宏观控制OTU关联网络构建  
参数：输入、输出文件名、阈值列表

In [2]:
names = ['dpwk', 'line', 'lle', 'n2v']
threshholds = [0.5, 0.7, 0.5, 0.8]
in_fnames = [ './features/'+name+'.txt' for name in names ]
out_fnames = [ './graphs_new/'+name+'.adjlist' for name in names ]

i = 0
print('begin time:{}'.format(time.asctime(time.localtime(time.time()))))    
# get_pearson_network(in_fnames[i], out_fnames[i], threshholds[i] )
print('end time:{}'.format(time.asctime(time.localtime(time.time()))))    

begin time:Sun Apr 25 22:41:30 2021
end time:Sun Apr 25 22:41:30 2021


#### Order2 graphs(dpwk,n2v,lle,line) are not joint.   
Thus some comparing machine learning algorithms don't work at all.  
For this k-neighbours is utilized here.  
#### 功能：二阶图k近邻图构建，合并相似性图  
输入：名字dpwk等  
输出：adjlist格式、edgelist合并图

In [4]:
# 第一步，生成k近邻图
for name in names:
    print('{} k-neighbours graph constructing...'.format(name))
    construct_graph(name)
    print('{} k-neighbours graph constructed...'.format(name))

dpwk k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:08<00:00, 2877.91it/s]
25023it [00:03, 7343.78it/s]


dpwk k-neighbours graph constructed...
line k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:09<00:00, 2568.74it/s]
25023it [00:03, 7315.45it/s]


line k-neighbours graph constructed...
lle k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:06<00:00, 3727.05it/s]
25023it [00:03, 7009.73it/s]


lle k-neighbours graph constructed...
n2v k-neighbours graph constructing...


100%|██████████| 25023/25023 [00:05<00:00, 4244.84it/s]
25023it [00:03, 8063.87it/s]


n2v k-neighbours graph constructed...


In [3]:
# 第二步，合并pearson图、k近邻图
for name in names:
    print('{} k-neighbours graph merging...'.format(name))
    merge_graphs(name)
    print('{} k-neighbours graph merged...'.format(name))

dpwk k-neighbours graph merging...


  1%|          | 16137/2299958 [00:00<00:14, 161356.30it/s]

G1:./graphs_pearson/dpwk.adjlist & G2:./graphs_kneighbour/dpwk30.adjlist graphs merging...
before:G1 edges:2299958,G2 edges:710565


100%|██████████| 2299958/2299958 [00:15<00:00, 150065.36it/s]
100%|██████████| 710565/710565 [00:05<00:00, 134381.75it/s]


after:G_merge edges:2578948
dpwk k-neighbours graph merged...
line k-neighbours graph merging...


  1%|          | 15480/2209212 [00:00<00:14, 154782.80it/s]

G1:./graphs_pearson/line.adjlist & G2:./graphs_kneighbour/line30.adjlist graphs merging...
before:G1 edges:2209212,G2 edges:582314


100%|██████████| 2209212/2209212 [00:14<00:00, 147943.80it/s]
100%|██████████| 582314/582314 [00:04<00:00, 139407.83it/s]


after:G_merge edges:2405052
line k-neighbours graph merged...
lle k-neighbours graph merging...


  1%|          | 16619/2344121 [00:00<00:14, 166176.29it/s]

G1:./graphs_pearson/lle.adjlist & G2:./graphs_kneighbour/lle30.adjlist graphs merging...
before:G1 edges:2344121,G2 edges:575980


100%|██████████| 2344121/2344121 [00:16<00:00, 145447.51it/s]
100%|██████████| 575980/575980 [00:03<00:00, 144941.89it/s]


after:G_merge edges:2396027
lle k-neighbours graph merged...
n2v k-neighbours graph merging...


  1%|          | 15293/2053600 [00:00<00:13, 152912.28it/s]

G1:./graphs_pearson/n2v.adjlist & G2:./graphs_kneighbour/n2v30.adjlist graphs merging...
before:G1 edges:2053600,G2 edges:678259


100%|██████████| 2053600/2053600 [00:13<00:00, 149148.23it/s]
100%|██████████| 678259/678259 [00:04<00:00, 144810.64it/s]


after:G_merge edges:2321598
n2v k-neighbours graph merged...


#### 功能：adjlist文件转换为edgelist文件  
输入：dpwk.adjlist的名字dpwk等  
输出：dpwk.edgelist等, 如 0 1 \n0 6  

In [None]:
# myadj2edg()