In [19]:
import networkx as nx
from networkx.algorithms import community
from community import community_louvain
import numpy as np
import scipy as sp
import pandas as pd
import markov_clustering as mc
import pickle

In [20]:
## preprocessing
G0 = nx.read_weighted_edgelist("/Users/luwenquan/Desktop/Curr_Study/MATH3888/math3888-network-project/4932.protein.links.v11.5.txt",comments="#",nodetype=str)


In [21]:
threshold_score = 700
for edge in G0.edges: 
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])

In [22]:
essential_proteins = pd.read_csv("essential_proteins.csv", header = None)

for node in list(G0.nodes()):
    if node[5:] in list(essential_proteins[1]):
        G0.remove_node(node)

In [23]:
len(list(G0.nodes()))

5098

In [24]:
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)

In [25]:
def generate_louvain_result_for_ensemble(graph, resolution, iteration):
    result = []
    for i in range(iteration):
        partition = community_louvain.best_partition(graph, resolution=resolution)
        result.append(list(partition.values()))
        print("Number of clusters: {}".format(max(partition.values()) + 1))
    return result

def generate_markov_result_for_ensemble(graph, inflation, iteration):
    result = []
    for i in range(iteration):
        matrix = nx.to_scipy_sparse_matrix(graph)
        mc_result = mc.run_mcl(matrix, inflation=inflation)
        mc_clusters = mc.get_clusters(mc_result)
        print("Number of clusters: {}".format(len(mc_clusters)))
        row = []
        for node in sorted(list(graph.nodes())):
            index_in_matrix = list(graph.nodes()).index(node)
            for i in range(len(mc_clusters)):
                if index_in_matrix in mc_clusters[i]:
                    row.append(i)      
        result.append(row)
    return result

In [26]:
nx.connected.is_connected(G)

True

In [27]:
A = nx.adjacency_matrix(G)
A = A.todense()
A[A>0] = 1
with open('adjacency_matrix.npy', 'wb') as f:
    np.save(f, A)
with open('node_indices.npy', 'wb') as f:
    np.save(f, np.array(G.nodes))


  A = nx.adjacency_matrix(G)


In [28]:
louvain_data = generate_louvain_result_for_ensemble(G, 0.1, 100)
louvain_np_array = np.array(louvain_data)
with open("./louvain_data.npy", "wb") as f:
    np.save(f, louvain_np_array)


Number of clusters: 239
Number of clusters: 242
Number of clusters: 246
Number of clusters: 236
Number of clusters: 241
Number of clusters: 240
Number of clusters: 244
Number of clusters: 243
Number of clusters: 243
Number of clusters: 241
Number of clusters: 241
Number of clusters: 239
Number of clusters: 245
Number of clusters: 242
Number of clusters: 242
Number of clusters: 240
Number of clusters: 237
Number of clusters: 240
Number of clusters: 241
Number of clusters: 243
Number of clusters: 241
Number of clusters: 242
Number of clusters: 238
Number of clusters: 238
Number of clusters: 237
Number of clusters: 238
Number of clusters: 240
Number of clusters: 243
Number of clusters: 238
Number of clusters: 244
Number of clusters: 242
Number of clusters: 240
Number of clusters: 239
Number of clusters: 239
Number of clusters: 243
Number of clusters: 240
Number of clusters: 238
Number of clusters: 241
Number of clusters: 236
Number of clusters: 241
Number of clusters: 244
Number of cluste

In [42]:
#with open("./louvain_data.npy", "wb") as f:
#    np.save(f, louvain_np_array)

In [44]:
#with open("./louvain_data.npy", "rb") as f:
#    E = np.load(f)
#    print(E.shape)

(10, 5064)


In [50]:
#sorted_node = np.array(sorted(list(G.nodes())))
#with open("./sorted_node.npy", "wb") as f:
#    np.save(f, sorted_node)


In [53]:
nx.adjacency_matrix(G).toarray()

  nx.adjacency_matrix(G).toarray()


array([[  0., 347., 945., ...,   0.,   0.,   0.],
       [347.,   0.,   0., ...,   0.,   0.,   0.],
       [945.,   0.,   0., ...,   0.,   0.,   0.],
       ...,
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]])

In [None]:
centrality = nx.eigenvector_centrality(G)
centrality = sorted((v, f"{c:0.2f}") for v, c in centrality.items())

In [None]:
with open('neighbouring_clusters.pickle', 'rb') as f:
    neighbours = pickle.load(f)
max_centrality_nodes=[] 
for cluster in neighbours:
    cluster_centrality = {k: centrality[k] for k in cluster}
    max_centrality_nodes.append(max(cluster_centrality, key=cluster_centrality.get))
# max_centrality_nodes[i] is the node with maximum centrality from cluster i

In [None]:
max_centrality_nodes