In [1]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
### 使用 networkx 包中的函数 LFR_benchmark_graph 生成随机图
import networkx as nx
from networkx.generators.community import LFR_benchmark_graph

n = 1000
tau1 = 2  # Power-law exponent for the degree distribution
tau2 = 1.1 # Power-law exponent for the community size distribution 
            #S hould be >1
mu = 0.05 # Mixing parameter
avg_deg = 25 # Average Degree
max_deg = 100 # Max Degree
min_commu = 80 # Min Community Size
max_commu = 100 # Max Community Size

G = LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=avg_deg, max_degree=max_deg, min_community=min_commu, max_community=max_commu, 
    seed=7
)
### 去掉 G 中的重边和自环 
G = nx.Graph(G) # Remove multi-edges

selfloop_edges = list(nx.selfloop_edges(G)) # a list of self loops

G.remove_edges_from(selfloop_edges) # Remove self-loops

In [3]:
import numpy as np
intrinsic_communities = {frozenset(G.nodes[v]["community"]) for v in G}
intrinsic_membership = np.empty(G.number_of_nodes(), dtype=int)
for node in range(G.number_of_nodes()):
    for index, inner_set in enumerate(intrinsic_communities):
        if node in inner_set:
            intrinsic_membership[node] = index
            break

In [4]:
remain_nodes = np.array(range(G.number_of_nodes()))
idx = [True] *(G.number_of_nodes())

G.remove_node(2)
idx[2]=False

In [5]:
#import numpy as np
K = len(np.unique(intrinsic_membership[idx]))
remain_nodes=remain_nodes[idx]
intrinsic_membership_2=intrinsic_membership[idx]

from clusim.clustering import Clustering
intrinsic_clustering_2 = Clustering(
                            elm2clu_dict={i: [intrinsic_membership_2[i]] for i in range(len(intrinsic_membership_2))}
                            )

In [6]:
from auxpack.eval_embd import eval_embd as EE

D=20

In [7]:
%%time
### 1 Hope 方法
from gem.embedding.hope import HOPE  

hope_model = HOPE(d=D, beta=0.01) 
# A higher value of beta places more emphasis on capturing higher-order proximities
embd = hope_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9874226530853143, 1.0, 0.9825919511875483, 1.0]
CPU times: user 35.2 s, sys: 43.5 s, total: 1min 18s
Wall time: 768 ms


In [8]:
%%time
### 2 Laplacian 方法
from gem.embedding.lap import LaplacianEigenmaps

lap_model = LaplacianEigenmaps(d=D)
embd = lap_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 23.6 s, sys: 36 s, total: 59.6 s
Wall time: 752 ms


In [9]:
%%time
### 3 LLE 方法
from auxpack.lle import lle

embd = lle(G, D)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 46.5 s, sys: 1min 7s, total: 1min 53s
Wall time: 1.29 s


In [10]:
%%time
### 4 DeepWalk方法
from auxpack.DeepWalk import DeepWalk

model = DeepWalk(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0000000000000002, 1.0, 1.0, 1.0]
CPU times: user 22.7 s, sys: 25.7 s, total: 48.4 s
Wall time: 2.56 s


In [11]:
%%time
### 5 Node2Vec 方法 以后使用这个
from node2vec import Node2Vec

nodes = [str(i) for i in remain_nodes]
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec_model = Node2Vec(G, dimensions=D, walk_length=16, num_walks=10, workers=32, quiet=True) #, temp_folder='test' # Use temp_folder for big graphs
# Embed nodes 
node2vec_fit = node2vec_model.fit(window=10, min_count=1, batch_words=40000)  
# Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed 
# (from the Node2Vec constructor)
embd = np.array([node2vec_fit.wv[node] for node in nodes])

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 44.7 s, sys: 40 s, total: 1min 24s
Wall time: 21.2 s


In [12]:
%%time
### 6 MNMF 方法
from karateclub import MNMF

# Create an instance of the MNMF model
MNMF_model = MNMF(dimensions = D, clusters = K, lambd = 0.2, 
             alpha = 0.05, beta = 0.05, iterations = 100, 
             lower_control = 1e-15, eta = 5.0, seed = 42)

# Fit the model to the graph
H = nx.relabel.convert_node_labels_to_integers(G)
MNMF_model.fit(H)
# Obtain the graph embeddings
embd = MNMF_model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9999999999999998, 1.0, 1.0, 1.0]
CPU times: user 55.6 s, sys: 2min 25s, total: 3min 21s
Wall time: 2.95 s


In [14]:
%%time
### 7 LINE 方法
from ge import LINE

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors
embd = list(LINE_embd.values())

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 29.7 s, sys: 24.9 s, total: 54.6 s
Wall time: 5.51 s
