In [1]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
### 使用 networkx 包中的函数 LFR_benchmark_graph 生成随机图
import networkx as nx
from networkx.generators.community import LFR_benchmark_graph

n = 1000
tau1 = 2  # Power-law exponent for the degree distribution
tau2 = 1.1 # Power-law exponent for the community size distribution 
            #S hould be >1
mu = 0.05 # Mixing parameter
avg_deg = 25 # Average Degree
max_deg = 100 # Max Degree
min_commu = 80 # Min Community Size
max_commu = 100 # Max Community Size

G = LFR_benchmark_graph(
    n, tau1, tau2, mu, average_degree=avg_deg, max_degree=max_deg, min_community=min_commu, max_community=max_commu, 
    seed=7
)
### 去掉 G 中的重边和自环 
G = nx.Graph(G) # Remove multi-edges

selfloop_edges = list(nx.selfloop_edges(G)) # a list of self loops

G.remove_edges_from(selfloop_edges) # Remove self-loops

In [3]:
import numpy as np
intrinsic_communities = {frozenset(G.nodes[v]["community"]) for v in G}
intrinsic_membership = np.empty(G.number_of_nodes(), dtype=int)
for node in range(G.number_of_nodes()):
    for index, inner_set in enumerate(intrinsic_communities):
        if node in inner_set:
            intrinsic_membership[node] = index
            break

In [4]:
G.remove_node(2)

idx = [True] *(G.number_of_nodes()+1)
idx[2]=False

In [5]:
import numpy as np
K = len(np.unique(intrinsic_membership[idx]))

intrinsic_membership_2=intrinsic_membership[idx]

from clusim.clustering import Clustering
intrinsic_clustering_2 = Clustering(
                            elm2clu_dict={i: [intrinsic_membership_2[i]] for i in range(len(intrinsic_membership_2))}
                            )

In [6]:
from auxpack.eval_embd import eval_embd as EE

In [7]:
%%time
### 1 Hope 方法
D=20
from gem.embedding.hope import HOPE    
hope_model = HOPE(d=D, beta=0.01) 
# A higher value of beta places more emphasis on capturing higher-order proximities
embd = hope_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9874226530853143, 1.0, 0.9825919511875483, 1.0]
CPU times: user 26.3 s, sys: 35.6 s, total: 1min 1s
Wall time: 615 ms


In [8]:
%%time
### 2 Laplacian 方法
from gem.embedding.lap import LaplacianEigenmaps
D = 20
lap_model = LaplacianEigenmaps(d=D)
embd = lap_model.learn_embedding(graph=G, is_weighted=False, no_python=True)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 28.2 s, sys: 38.2 s, total: 1min 6s
Wall time: 760 ms


In [9]:
%%time
### 3 LLE 方法
from auxpack.lle import lle
D = 20
embd = lle(G, D)

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 47.6 s, sys: 1min 8s, total: 1min 55s
Wall time: 1.28 s


In [10]:
%%time
### 4 DeepWalk方法
from auxpack.DeepWalk import DeepWalk
D = 20
model = DeepWalk(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 28 s, sys: 31.6 s, total: 59.6 s
Wall time: 2.57 s


In [30]:
%%time
### 5 Node2Vec方法
from auxpack.Node2Vec import Node2Vec

D = 20
model = Node2Vec(dimensions=D, walk_length=16, window_size=10, walk_number=10, workers=32)
model.fit(G)
embd = model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 1.0, 1.0, 1.0]
CPU times: user 43.2 s, sys: 20.3 s, total: 1min 3s
Wall time: 21.1 s


In [31]:
%%time
### 5 Node2Vec 方法 以后使用这个

from node2vec import Node2Vec
import numpy as np

nodes = [str(i) for i in list(G.nodes())]

D=20
# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec_model = Node2Vec(G, dimensions=D, walk_length=16, num_walks=10, workers=32, quiet=True) #, temp_folder='test' # Use temp_folder for big graphs
# Embed nodes 
node2vec_fit = node2vec_model.fit(window=10, min_count=1, batch_words=16192)  
# Any keywords acceptable by gensim.Word2Vec can be passed, `dimensions` and `workers` are automatically passed 
# (from the Node2Vec constructor)
embd = np.array([node2vec_fit.wv[node] for node in nodes])
defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [1.0, 0.9999999999999998, 1.0, 1.0]
CPU times: user 44.9 s, sys: 27.8 s, total: 1min 12s
Wall time: 17.2 s


In [33]:
%%time
### 6 LINE 方法
from ge import LINE
D = 3

model = LINE(G,embedding_size=D,order='first');
model.train(batch_size=8192,epochs=50,verbose=0);# train model
LINE_embd = model.get_embeddings();# get embedding vectors

embd = list(LINE_embd.values())

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9011655028172546, 0.9272677413112281, 0.8291624059102365, 0.8779333656437953]
CPU times: user 11.4 s, sys: 404 ms, total: 11.8 s
Wall time: 5.74 s


In [39]:
%%time

from karateclub import MNMF
D=7

# Create an instance of the MNMF model
MNMF_model = MNMF(dimensions = D, clusters = K, lambd = 0.2, 
             alpha = 0.05, beta = 0.05, iterations = 100, 
             lower_control = 1e-15, eta = 5.0, seed = 42)

# Fit the model to the graph
H = nx.relabel.convert_node_labels_to_integers(G)
MNMF_model.fit(H)

# Obtain the graph embeddings
embd = MNMF_model.get_embedding()

defen = EE(K, intrinsic_membership_2 ,intrinsic_clustering_2,embd)
print("NMI&ECS:", defen)

NMI&ECS: [0.9919996615178364, 0.9937409167073007, 0.9882550850626074, 0.9931415218771541]
CPU times: user 4.7 s, sys: 284 ms, total: 4.98 s
Wall time: 1.05 s
