forked from yilaguan/lightcluster
/
testing(single).py
73 lines (56 loc) · 3.14 KB
/
testing(single).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Load a dataset.
Available datasets are: 'football.txt', 'polbooks.txt', 'protein_new.txt', 'amazon.txt', 'scientists_new.txt',
'karate.txt', 'facebook.txt', 'cliques.txt', 'nested.txt', 'stars.txt', 'cycles.txt'
"""
dataset = 'football.txt'
from load_data import download_graph
n_vertex, edge_list = download_graph('data\\'+dataset)
"""
Choose an algorithm.
Available algorithms are: 'Spectral', 'SCAN', 'GreedyNewman', 'Walktrap', 'LPA', 'CFinder', 'Clauset-Newman', 'Bigclam'
"""
algorithm = 'Walktrap'
"""
Use 'independent_clustering' for computing communities --- labels and clusters (see reference for details).
Specify some parameters (n_clusters, neighbours_threshold, similarity_threshold, n_steps, clique_size).
Unspecified parameters will be chosen automatically or you'll get an error message.
"""
from model_builder import clustering, independent_clustering
lbls_pred, clrs_pred, time = independent_clustering(algorithm, n_vertex, edge_list, n_clusters=15,
neighbours_threshold=2, similarity_threshold=0.5, n_steps=3)
#write communities into the console if you wish
print lbls_pred
print clrs_pred
print time
from load_data import write_labels, write_clusters
#write communities (labels) into 'data\\answers\\'+'labels_'+algorithm+'_'+dataset
write_labels(algorithm, dataset, lbls_pred)
#write communities (clusters) into 'data\\answers\\'+'clusters_'+algorithm+'_'+dataset
write_labels(algorithm, dataset, clrs_pred)
"""
You can calculate goodness (non ground-truth) metrics, such as: modularity, overlapping modularity, ratio cut, normalized cut.
"""
from cluster_metrics import compute_modularity, compute_overlapping_modularity, compute_ratio_cut, compute_normalized_cut
print "Modularity = " + str(compute_modularity(lbls_pred, edge_list))
print "Overlapping modularity = " + str(compute_overlapping_modularity(clrs_pred, n_vertex, edge_list))
print "RatioCut = " + str(compute_ratio_cut(lbls_pred, clrs_pred, edge_list))
print "NormalizedCut = " + str(compute_normalized_cut(lbls_pred, clrs_pred, edge_list))
"""
If ground-truth communities are known, you can load them.
After that you can calculate performance (ground-truth) metrics, such as: average F1-score, average recall, average precision,
normalized mutual information (NMI), adjusted_rand_score (ARS)
"""
#if true labels are known
from load_data import download_labels, download_clusters
lbls_true = download_labels('data\\'+dataset[:-4]+'_labels.txt')
from transform_functions import compute_clusters_from_labels
clrs_true = compute_clusters_from_labels(lbls_true)
#if only true clusters are known
#clrs_true = download_clusters('data\\'+dataset[:-4]+'_clusters.txt')
from cluster_metrics import compute_avg_f1, compute_recall, compute_precision, compute_nmi, compute_ars
print "Average F1-score = " + str(compute_avg_f1(clrs_true, clrs_pred))
print "Average recall = " + str(compute_recall(clrs_true, clrs_pred))
print "Average precision = " + str(compute_precision(clrs_true, clrs_pred))
print "NMI = " + str(compute_nmi(lbls_true, lbls_pred))
print "ARS = " + str(compute_ars(lbls_true, lbls_pred))