In [1]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
### 使用 networkx 包中的函数 LFR_benchmark_graph 生成随机图

# 导入 networkx 包并给定参数生成 LFR 图
import networkx as nx

from networkx.generators.community import LFR_benchmark_graph


n = 250

tau1 = 2.5

tau2 = 1.5

mu = 0.1

G = LFR_benchmark_graph(

    n, tau1, tau2, mu, average_degree=6, min_community=30, seed=7

)
### 去掉 G 中的重边和自环 
G = nx.Graph(G) # Remove multi-edges

selfloop_edges = list(nx.selfloop_edges(G)) # a list of self loops

G.remove_edges_from(selfloop_edges) # Remove self-loops

In [3]:
import numpy as np
intrinsic_communities = {frozenset(G.nodes[v]["community"]) for v in G}
intrinsic_membership = np.empty(G.number_of_nodes(), dtype=int)
for node in range(G.number_of_nodes()):
    for index, inner_set in enumerate(intrinsic_communities):
        if node in inner_set:
            intrinsic_membership[node] = index
            break

In [4]:
for vertex in range(0,30):
    edges_to_remove = list(G.edges(vertex))
    G.remove_edges_from(edges_to_remove)

#import numpy as np
K = len(np.unique(intrinsic_membership))


from clusim.clustering import Clustering
intrinsic_clustering = Clustering(
                            elm2clu_dict={i: [intrinsic_membership[i]] for i in range(len(intrinsic_membership))}
                            )

In [5]:
from auxpack.eval_embd import eval_embd as EE
from auxpack.eval_embd import euclid_membership as EM
from auxpack.eval_embd import cosine_membership as CM
from auxpack.evaluate_clustering import NMI
from auxpack.evaluate_clustering import ECSim

D=5

下面所得到的嵌入结果 embd的类型均为 array

In [6]:
%%time
### 4 DeepWalk方法
from auxpack.DeepWalk import DeepWalk

model = DeepWalk(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[6 6 6 6 6 6 6 6 6 4 3 6 4 3 6 6 4 7 6 6 4 4 6 6 3 3 7 6 3 7]
[6 2 6 1 6 6 4 2 6 0 5 6 1 5 6 6 1 2 6 6 1 1 6 6 5 5 7 2 5 2]
NMI&ECS: 0.2084846927544074 0.30658874733002356 0.21135964912280708 0.23277777777777778
NMI&ECS: [0.7341114603879356, 0.6941569302201412, 0.6568108543417367, 0.6478826075884899]
CPU times: user 11.6 s, sys: 235 ms, total: 11.8 s
Wall time: 2.3 s


In [7]:
%%time
### 5 Node2Vec方法 从 KarateClub提取的版本
from auxpack.Node2Vec import Node2Vec

model = Node2Vec(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 3 5 5 5 5 5 5 1 1 5 5 4]
[7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 7 7 1 0 7 7 7 7 7 7 4 4 7 7 2]
NMI&ECS: 0.20747986058341406 0.24607836355821167 0.17899999999999988 0.1833333333333332
NMI&ECS: [0.7244723893495836, 0.7265927106961033, 0.6662320028011204, 0.6779597560975609]
CPU times: user 14.3 s, sys: 0 ns, total: 14.3 s
Wall time: 4.04 s


In [8]:
%%time
### 5 Node2Vec 方法 以后使用这个
from node2vec import Node2Vec

remain_nodes = np.array(range(G.number_of_nodes()))
nodes = [str(i) for i in remain_nodes]
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec_model = Node2Vec(G, dimensions=D, walk_length=16, num_walks=10, workers=32, quiet=True) #, temp_folder='test' # Use temp_folder for big graphs
# Embed nodes 
node2vec_fit = node2vec_model.fit(window=10, min_count=1, batch_words=20000)  
# Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed 
# (from the Node2Vec constructor)
embd = np.array([node2vec_fit.wv[node] for node in nodes])

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
[7 1 3 3 3 5 6 1 6 0 2 2 1 3 3 4 2 3 3 6 2 7 3 3 5 6 3 6 2 2]
NMI&ECS: 0.0 0.36237487452202577 0.17111111111111105 0.2378282828282828
NMI&ECS: [0.7735126862775716, 0.7550366055993956, 0.7041265417331207, 0.7304422528094429]
CPU times: user 15.9 s, sys: 11.3 s, total: 27.2 s
Wall time: 5.14 s


In [9]:
%%time
### 6 MNMF 方法
from karateclub import MNMF

# Create an instance of the MNMF model
MNMF_model = MNMF(dimensions = D, clusters = K, lambd = 0.2, 
             alpha = 0.05, beta = 0.05, iterations = 100, 
             lower_control = 1e-15, eta = 5.0, seed = 42)

# Fit the model to the graph
H = nx.relabel.convert_node_labels_to_integers(G)
MNMF_model.fit(H)
# Obtain the graph embeddings
embd = MNMF_model.get_embedding()

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[4 1 4 3 5 1 5 5 3 3 4 5 1 1 2 3 4 1 1 5 3 4 1 4 5 1 2 3 3 4]
[3 4 3 5 0 4 0 0 5 5 3 0 4 4 2 5 3 4 4 0 5 3 4 3 0 4 2 5 5 3]
NMI&ECS: 0.28261058404893663 0.2826105840489366 0.2517857142857142 0.2517857142857142
NMI&ECS: [0.639108835906917, 0.6383395844262291, 0.5791266004526874, 0.5560914667560001]
CPU times: user 7.06 s, sys: 0 ns, total: 7.06 s
Wall time: 157 ms


In [10]:
%%time
### Run a second time will be much faster!
### 7 LINE 方法
from ge import LINE

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors
embd = list(LINE_embd.values())

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

2023-07-04 11:14:56.222960: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-04 11:14:57.471587: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38271 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:02:00.0, compute capability: 8.0
2023-07-04 11:14:57.474996: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 38271 MB memory:  -> device: 1, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:41:00.0, compute capability: 8.0


[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[1 2 3 0 6 0 3 2 3 6 3 7 4 4 5 1 4 7 1 5 5 1 3 5 1 6 2 6 1 5]
[4 4 2 5 2 5 7 1 3 2 3 1 6 6 4 7 6 7 4 4 5 7 7 3 0 2 4 2 7 3]
NMI&ECS: 0.3806713095545265 0.41051914566419767 0.3015873015873015 0.3081746031746031
NMI&ECS: [0.09577992096239309, 0.09875924948230187, 0.16126230943862516, 0.16513731671554246]
CPU times: user 11.2 s, sys: 2.16 s, total: 13.4 s
Wall time: 9.91 s


In [11]:
%%time
### Running a second time will be much faster!
### 7 LINE 方法
from ge import LINE

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors
embd = list(LINE_embd.values())

print(intrinsic_membership[:30])
print(EM(K,embd)[:30])
print(CM(K,embd)[:30])
print("NMI&ECS:", NMI(intrinsic_membership[:30],EM(K,embd)[:30]),
      NMI(intrinsic_membership[:30],CM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],EM(K,embd)[:30]),
      ECSim(intrinsic_membership[:30],CM(K,embd)[:30])
     )
      
defen = EE(K, intrinsic_membership ,intrinsic_clustering,embd)
print("NMI&ECS:", defen)

[5 3 0 2 6 6 0 0 4 2 7 3 4 2 4 0 0 2 5 5 4 5 5 6 6 5 5 7 6 5]
[7 2 2 7 7 7 5 0 3 6 7 7 6 5 7 5 5 7 0 1 3 2 5 5 1 4 0 6 2 4]
[2 3 7 3 2 3 0 1 5 6 2 2 7 4 2 6 0 3 1 2 5 6 0 4 0 7 6 6 6 7]
NMI&ECS: 0.3367311184933419 0.4223690524784884 0.2549206349206349 0.24388888888888882
NMI&ECS: [0.07615229688386714, 0.06748604419056208, 0.15411724598930476, 0.1529419133419133]
CPU times: user 10.7 s, sys: 428 ms, total: 11.1 s
Wall time: 571 ms
