## Compare Experiments
```
karateclub's Non-Overlapping Community Detection  examples: https://github.com/benedekrozemberczki/karateclub/tree/master/examples/community_detection
1. GEMSEC (ASONAM 2019)  
2. EdMot (KDD 2019)   
3. SCD (WWW 2014)   
4. LabelPropagation (Physics Review E 2007)  
5. Spectral Clustering  
6. Louvain 
```

In [1]:
import numpy as np
from  sklearn import datasets
from sklearn.cluster import SpectralClustering
from community import best_partition
import networkx as nx
import time
import matplotlib.pyplot as plt
from karateclub.community_detection.non_overlapping import GEMSEC, SCD, LabelPropagation, EdMot
from components.utils import load_edgelist_graph,load_ordered_adjlist_graph
from components.evaluation import modularity, symmetric_matrix_modularity
import nocd
import matplotlib.pyplot as plt
%matplotlib inline

### 1.1 graph

In [2]:
name = 'dpwk'
# 'pearson'   'merged'   'kneighbour'
graph_type = 'kneighbour'
k_str = '5'

print('data load time:{}'.format(time.asctime(time.localtime(time.time()))))   
adjlist_graph_path = './graphs_' + graph_type + '/' + name + k_str+'.adjlist'
G = load_ordered_adjlist_graph(adjlist_graph_path)
x_path = './features/' + name + '.txt'
x = np.loadtxt( x_path, dtype=float)
y_pred = []
print('data load time:{}'.format(time.asctime(time.localtime(time.time()))))   

data load time:Wed Apr 28 22:38:45 2021


100%|██████████| 114690/114690 [00:00<00:00, 116674.32it/s]


G nodes 25023,edges 139713 with self-loops read already
data load time:Wed Apr 28 22:38:50 2021


#### karateclub

In [3]:
model = LabelPropagation()

def do_karate_train():
    model.fit(G)
    y_pred = model.get_memberships()
    return y_pred

#### Spectral Cluster

In [4]:
data_type = 'matrix'
n_clusters = 8

def do_spectral_train():
    y_pred = []
    if data_type == 'matrix':
        A = nx.to_numpy_array(G)
        y_pred = SpectralClustering( n_clusters , affinity='precomputed').fit_predict( A )
    elif data_type == 'features':
        y_pred = SpectralClustering( n_clusters ).fit_predict( x )  
    return y_pred

### train( karaclub or spectral or best_partition )

In [5]:
y_pred =  do_karate_train()
# y_pred = do_spectral_train()
# y_pred = best_partition(G)
print('train time:{}'.format(time.asctime(time.localtime(time.time()))))  

train time:Wed Apr 28 22:39:36 2021


### 3.1 evaluate

In [6]:
comm2count = {}
for i in range(25023):
    if y_pred[i] in comm2count:
        comm2count[y_pred[i]] += 1
    else:
        comm2count[y_pred[i]] = 1
print('number of communities: {}'.format(len(comm2count)))
for ky in comm2count.keys():
    print('community {:5d} count {:5d}'.format(ky, comm2count[ky]))

number of communities: 1146
community  3124 count   973
community 20315 count   717
community 13719 count   920
community  4494 count   135
community 24513 count  1353
community 24697 count   926
community  4193 count    14
community   853 count    82
community  7257 count     3
community 12600 count   546
community  2399 count   284
community   143 count    91
community 21486 count   392
community 18015 count   249
community 11638 count   862
community  6071 count   585
community  8608 count   678
community   296 count    30
community 15992 count   931
community 22031 count    73
community  7253 count    38
community  1329 count    16
community 16218 count   346
community   404 count    58
community 11972 count   864
community 12192 count     3
community  4497 count    13
community  7079 count   123
community 24633 count   223
community 14577 count     2
community  5634 count   158
community  3131 count    30
community 20757 count   464
community    86 count     8
community 21976 coun

In [7]:
print(name)
print(f'\nFinal symmetric_matrix_modularity = {symmetric_matrix_modularity(G, y_pred):.8f}')
print('evaluae modularity over time:{}'.format(time.asctime(time.localtime(time.time()))))   

dpwk

Final symmetric_matrix_modularity = 0.59656068
evaluae modularity over time:Wed Apr 28 22:42:53 2021


Community Detection
```
plt.figure(figsize=[10, 10])
o = np.argsort(y_pred)
graph_edgelist_path =  './graphs_' + graph_type + '/' + name + '.edgelist'
A = load_edgelist_graph(graph_edgelist_path)
nocd.utils.plot_sparse_clustered_adjacency(A, n_clusters, y_pred, o, markersize=0.05)
```

### draw

In [8]:
```
'''total modularity'''
graphs_size = 5
x = np.arange(graphs_size)
y_LabelProp   = np.array([ 0.5332, 0.7481, 0.7435, 0.7943, 0.8429 ])
y_SCD         = np.array([ 0.3128, 0.5534, 0.5713, 0.6969, 0.6928 ])
y_GEMSEC      = np.array([ 0.0531, 0.4448, 0.4275, 0.3922, 0.5468 ])
y_Spectral    = np.array([ 0.4100, 0.6568, 0.6546, 0.3614, 0.7289 ])
y_Louvain     = np.array([ 0.5872, 0.6798, 0.7176, 0.8092, 0.7812 ])
y_OTUCD       = np.array([ 0.5371, 0.7250, 0.7267, 0.7447, 0.7943 ]) 

total_width, n_methods = 0.8, 6    # zhihu 0.8, 3
width = total_width / n_methods
x = x - (total_width - width) / 2

plt.bar(x,             y_LabelProp,  width=width, label='LabelProp')
plt.bar(x + width,     y_SCD,        width=width, label='SCD')
plt.bar(x + 2 * width, y_GEMSEC,     width=width, label='GEMSEC')
plt.bar(x + 3 * width, y_Spectral,   width=width, label='Spectral')
plt.bar(x + 4 * width, y_Louvain,    width=width, label='Louvain')
plt.bar(x + 5 * width, y_OTUCD,      width=width, label='OTUCD')
plt.legend( loc=2, bbox_to_anchor=(1.05,1.0) )
plt.show()
```

SyntaxError: invalid syntax (<ipython-input-8-b5357bccf528>, line 1)