In [1]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
### 使用 networkx 包中的函数 LFR_benchmark_graph 生成随机图
import networkx as nx
from networkx.generators.community import LFR_benchmark_graph

n = 1000
tau1 = 2  # Power-law exponent for the degree distribution
tau2 = 1.1 # Power-law exponent for the community size distribution 
            #S hould be >1
mu = 0.05 # Mixing parameter
avg_deg = 25 # Average Degree
max_deg = 100 # Max Degree
min_commu = 80 # Min Community Size
max_commu = 100 # Max Community Size

G = LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=avg_deg, max_degree=max_deg, min_community=min_commu, max_community=max_commu, 
    seed=7
)
### 去掉 G 中的重边和自环 
G = nx.Graph(G) # Remove multi-edges

selfloop_edges = list(nx.selfloop_edges(G)) # a list of self loops

G.remove_edges_from(selfloop_edges) # Remove self-loops

In [3]:
import numpy as np
intrinsic_communities = {frozenset(G.nodes[v]["community"]) for v in G}
intrinsic_membership = np.empty(G.number_of_nodes(), dtype=int)
for node in range(G.number_of_nodes()):
    for index, inner_set in enumerate(intrinsic_communities):
        if node in inner_set:
            intrinsic_membership[node] = index
            break

In [4]:
remain_nodes = np.array(range(G.number_of_nodes()))
idx = [True] *(G.number_of_nodes())

In [5]:
G.remove_node(2)
idx[2]=False
#import numpy as np
K = len(np.unique(intrinsic_membership[idx]))
remain_nodes=remain_nodes[idx]
intrinsic_membership_2=intrinsic_membership[idx]
from clusim.clustering import Clustering
intrinsic_clustering_2 = Clustering(
                            elm2clu_dict={i: [intrinsic_membership_2[i]] for i in range(len(intrinsic_membership_2))}
                            )

In [6]:
from auxpack.eval_embd import eval_embd as EE

D=30

下面所得到的嵌入结果 embd的类型均为 array

In [7]:
%%time
### 1 Hope 方法
from gem.embedding.hope import HOPE  

hope_model = HOPE(d=D, beta=0.01) 
# A higher value of beta places more emphasis on capturing higher-order proximities
embd = hope_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0000000000000002, 1.0, 1.0, 1.0]
CPU times: user 27.7 s, sys: 38.2 s, total: 1min 5s
Wall time: 649 ms


In [8]:
%%time
### 2 Laplacian 方法
from gem.embedding.lap import LaplacianEigenmaps

lap_model = LaplacianEigenmaps(d=D)
embd = lap_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9757244935066341, 1.0, 0.9408845532147517, 1.0]
CPU times: user 26.7 s, sys: 41.4 s, total: 1min 8s
Wall time: 781 ms


In [9]:
%%time
### 3 LLE 方法
from auxpack.lle import lle

embd = lle(G, D)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.959373869699968, 0.9999999999999998, 0.8620327030301864, 1.0]
CPU times: user 49.8 s, sys: 1min 12s, total: 2min 2s
Wall time: 1.36 s


In [10]:
%%time
### 4 DeepWalk方法
from auxpack.DeepWalk import DeepWalk

model = DeepWalk(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0000000000000002, 1.0, 1.0, 1.0]
CPU times: user 27 s, sys: 28.3 s, total: 55.3 s
Wall time: 2.2 s


In [11]:
%%time
### 5 Node2Vec方法 从 KarateClub提取的版本
from auxpack.Node2Vec import Node2Vec

model = Node2Vec(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0000000000000002, 1.0, 1.0, 1.0]
CPU times: user 45.3 s, sys: 26.5 s, total: 1min 11s
Wall time: 21.2 s


In [12]:
%%time
### 5 Node2Vec 方法 以后使用这个
from node2vec import Node2Vec

nodes = [str(i) for i in remain_nodes]
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec_model = Node2Vec(G, dimensions=D, walk_length=16, num_walks=10, workers=32, quiet=True) #, temp_folder='test' # Use temp_folder for big graphs
# Embed nodes 
node2vec_fit = node2vec_model.fit(window=10, min_count=1, batch_words=20000)  
# Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed 
# (from the Node2Vec constructor)
embd = np.array([node2vec_fit.wv[node] for node in nodes])

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 53.6 s, sys: 41.5 s, total: 1min 35s
Wall time: 21.6 s


In [13]:
%%time
### 6 MNMF 方法
from karateclub import MNMF

# Create an instance of the MNMF model
MNMF_model = MNMF(dimensions = D, clusters = K, lambd = 0.2, 
             alpha = 0.05, beta = 0.05, iterations = 100, 
             lower_control = 1e-15, eta = 5.0, seed = 42)

# Fit the model to the graph
H = nx.relabel.convert_node_labels_to_integers(G)
MNMF_model.fit(H)
# Obtain the graph embeddings
embd = MNMF_model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9976926653542104, 1.0, 0.9970287249980736]
CPU times: user 1min 5s, sys: 2min 51s, total: 3min 57s
Wall time: 3.45 s


In [14]:
%%time
### Run a second time will be much faster!
### 7 LINE 方法
from ge import LINE

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors
embd = list(LINE_embd.values())

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

2023-07-03 22:54:11.660105: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-03 22:54:12.874319: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38271 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:02:00.0, compute capability: 8.0
2023-07-03 22:54:12.876234: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38271 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:41:00.0, compute capability: 8.0


NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 36.2 s, sys: 33.3 s, total: 1min 9s
Wall time: 15 s


In [15]:
%%time
### Run a second time will be much faster!
### 7 LINE 方法
from ge import LINE

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors
embd = list(LINE_embd.values())

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 34.2 s, sys: 31.6 s, total: 1min 5s
Wall time: 5.49 s
