Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9f3add8
commit 1285eec
Showing
10 changed files
with
1,264 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import pickle | ||
|
||
cutperc = 0.1 | ||
core_friend = set() | ||
core_user = set() | ||
with open('../data/core_user', 'r') as f: | ||
f.next() | ||
for line in f: | ||
core_user.add(line.strip()) | ||
|
||
with open('../data/core_friend', 'r') as f: | ||
f.next() | ||
for line in f: | ||
core_friend.add(line.strip()) | ||
|
||
y_user = [] | ||
y_friend = [] | ||
y_rest = [] | ||
x_user = [] | ||
x_friend = [] | ||
x_rest = [] | ||
with open('../data/core_degree', 'r') as f: | ||
count = 0 | ||
f.next() | ||
for line in f: | ||
words = line.split(',') | ||
if words[0].strip() in core_user: | ||
y_user.append(int(words[1].strip())) | ||
x_user.append(count) | ||
elif words[0].strip() in core_friend: | ||
y_friend.append(int(words[1].strip())) | ||
x_friend.append(count) | ||
else: | ||
y_rest.append(int(words[1].strip())) | ||
x_rest.append(count) | ||
count += 1 | ||
|
||
cutoff = np.log(count*cutperc) | ||
percent = map(lambda i: i < cutoff, x_friend).count(True) *1.0 / len(x_friend) | ||
print(cutoff, percent) | ||
|
||
|
||
plt.scatter(np.log(y_friend), np.log(x_friend), marker='d', alpha=0.1, facecolors='none', edgecolors='r', label='K direct friends of new users') | ||
plt.scatter(np.log(y_rest), np.log(x_rest), marker='.', alpha=0.05, facecolors='none', edgecolors='g', label='All other users') | ||
plt.axhline(cutoff, ls='--', c='b', label='Cutoff at top 10% users \nwith highest degrees') | ||
plt.legend(fontsize=15, ) | ||
plt.ylabel('Log(Rank By Degree)', fontsize=18) | ||
plt.xlabel('Log(Degree)', fontsize=18) | ||
plt.xticks(fontsize=0) | ||
plt.yticks(fontsize=0) | ||
#plt.scatter(x_user, y_user, marker='*', facecolors='none', edgecolors='b') | ||
plt.savefig('../plots/fig_core_original.png', format='png', bbox_inches='tight') | ||
plt.show() | ||
plt.clf() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from evaluate import f1_community, jc_community, nmi_community | ||
import pickle | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
with open('../data/labels.pkl', 'rb') as f: | ||
labellist = pickle.load(f) | ||
|
||
dim = len(labellist) | ||
newllist = [] | ||
for i in xrange(dim): | ||
labels = labellist[i] | ||
unique = np.unique(labels) | ||
newl = [[0]*len(labels) for j in range(len(unique))] | ||
for j in xrange(len(labels)): | ||
newl[labels[j]][j] = 1 | ||
newllist.append(newl) | ||
|
||
cross_f1 = [] | ||
cross_jc = [] | ||
cross_nmi = [] | ||
for i in xrange(dim): | ||
cross_f1 += [[0] * dim] | ||
cross_jc += [[0] * dim] | ||
cross_nmi += [[0] * dim] | ||
for j in xrange(dim): | ||
if i != j: | ||
cross_f1[i][j] = f1_community(newllist[i], newllist[j]) | ||
cross_jc[i][j] = jc_community(newllist[i], newllist[j]) | ||
cross_nmi[i][j] = nmi_community(newllist[i], newllist[j]) | ||
else: | ||
cross_f1[i][j] = 1 | ||
cross_jc[i][j] = 1 | ||
cross_nmi[i][j] = 1 | ||
|
||
with open('../data/cross.txt', 'w') as f: | ||
for p in xrange(dim): | ||
for q in xrange(dim): | ||
f.write(str(cross_f1[p][q])+' ') | ||
f.write('\n') | ||
f.write('------------\n') | ||
for p in xrange(dim): | ||
for q in xrange(dim): | ||
f.write(str(cross_jc[p][q])+' ') | ||
f.write('\n') | ||
f.write('------------\n') | ||
for p in xrange(dim): | ||
for q in xrange(dim): | ||
f.write(str(cross_nmi[p][q])+' ') | ||
f.write('\n') | ||
|
||
plt.imshow(cross_f1, cmap='Blues', interpolation='nearest') | ||
plt.savefig("../plots/f1_cross.png", bbox_inches='tight') | ||
|
||
plt.imshow(cross_jc, cmap='Blues', interpolation='nearest') | ||
plt.savefig("../plots/jc_cross.png", bbox_inches='tight') | ||
|
||
plt.imshow(cross_nmi, cmap='Blues', interpolation='nearest') | ||
plt.savefig("../plots/nmi_cross.png", bbox_inches='tight') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
import math | ||
import numpy as np | ||
import sys | ||
import csv | ||
|
||
def h_utils(w, n): | ||
if 0 == w: | ||
return 0 | ||
return -w * math.log(float(w) / n) | ||
|
||
def cover_entropy (xs, n): | ||
tot_ent = 0.0 | ||
for x in xs: | ||
tot_ent += h_utils(sum(x), n) + h_utils(n - sum(x), n) | ||
return tot_ent | ||
|
||
def calc_modified_conditional_matrix(pre_ys, true_ys, n): | ||
results_0 = np.zeros((len(pre_ys), len(true_ys))) | ||
results_1 = np.zeros((len(true_ys), len(pre_ys))) | ||
for ind_p in range(0, len(pre_ys)): | ||
pre_y = pre_ys[ind_p] | ||
for ind_t in range(0, len(true_ys)): | ||
true_y = true_ys[ind_t] | ||
a = sum([ 0 == (py + ty) for (py, ty) in zip(pre_y, true_y)]) | ||
d = sum([ 2 == (py + ty) for (py, ty) in zip(pre_y, true_y)]) | ||
b = sum(true_y) - d | ||
c = sum(pre_y) - d | ||
t1 = h_utils(a, n) + h_utils(d, n) | ||
t2 = h_utils(b, n) + h_utils(c, n) | ||
t3 = h_utils(c + d, n) + h_utils(a + b, n) | ||
t4 = h_utils(b + d, n) + h_utils(a + c, n) | ||
if t1 >= t2: | ||
results_0[ind_p][ind_t] = t1 + t2 - t3 | ||
results_1[ind_t][ind_p] = t1 + t2 - t4 | ||
else: | ||
results_0[ind_p][ind_t] = t3 | ||
results_1[ind_t][ind_p] = t4 | ||
return results_0, results_1 | ||
|
||
def nmi_community(pre_ys, true_ys): | ||
""" | ||
Normalized Mutual Information to evaluate overlapping community finding algorithms | ||
""" | ||
n = len(pre_ys[0]) | ||
hx = cover_entropy(pre_ys, n) | ||
hy = cover_entropy(true_ys, n) | ||
hxy, hyx = calc_modified_conditional_matrix(pre_ys, true_ys, n) | ||
hxy = sum([min(hxy_x) for hxy_x in hxy]) | ||
hyx = sum([min(hyx_y) for hyx_y in hyx]) | ||
return 0.5 * (hx + hy - hxy - hyx) / max(hx, hy) | ||
|
||
def f1_pair(pred_y, true_y): | ||
"""calculate f1 score for a pair of communities (predicted and ground truth) | ||
args: | ||
pred_y (N * 1): binary array, 1 means the corresponding instance belongs to predicted community | ||
true_y (N * 1): binary array, 1 means the corresponding instance belongs to golden community | ||
""" | ||
corrected = sum([ 2 == (py + ty) for (py, ty) in zip(pred_y, true_y)]) | ||
if 0 == corrected: | ||
return 0, 0, 0 | ||
precision = float(corrected) / sum(pred_y) | ||
recall = float(corrected) / sum(true_y) | ||
f1score = 2 * precision * recall / (precision + recall) | ||
return f1score, precision, recall | ||
|
||
def f1_community(pre_ys, true_ys): | ||
"""calculate f1 score for two sets of communities (predicted and ground truth) | ||
args: | ||
pred_ys (k * N): | ||
true_ys (l * N): | ||
""" | ||
tot_size = 0 | ||
tot_fscore = 0.0 | ||
for pre_y in pre_ys: | ||
cur_size = sum(pre_y) | ||
tot_size += cur_size | ||
tot_fscore += max([f1_pair(pre_y, true_y)[0] for true_y in true_ys]) * cur_size | ||
return float(tot_fscore) / tot_size | ||
|
||
|
||
def jc_pair(pred_y, true_y): | ||
"""calculate jc score for a pair of communities (predicted and ground truth) | ||
args: | ||
pred_y (N * 1): binary array, 1 means the corresponding instance belongs to predicted community | ||
true_y (N * 1): binary array, 1 means the corresponding instance belongs to golden community | ||
""" | ||
corrected = sum([ 2 == (py + ty) for (py, ty) in zip(pred_y, true_y)]) | ||
if 0 == corrected: | ||
return 0 | ||
tot = sum([ (py + ty) > 0 for (py, ty) in zip(pred_y, true_y)]) | ||
return float(corrected) / tot | ||
|
||
def jc_community(pre_ys, true_ys): | ||
"""calculate jc score for two sets of communities (predicted and ground truth) | ||
args: | ||
pred_ys (k * N): | ||
true_ys (l * N): | ||
""" | ||
tot_jcscore = 0.0 | ||
|
||
tmp_size = float(1) / ( len(pre_ys) * 2 ) | ||
for pre_y in pre_ys: | ||
tot_jcscore += max([jc_pair(pre_y, true_y) for true_y in true_ys]) * tmp_size | ||
|
||
tmp_size = float(1) / ( len(true_ys) * 2 ) | ||
for true_y in true_ys: | ||
tot_jcscore += max([jc_pair(pre_y, true_y) for pre_y in pre_ys]) * tmp_size | ||
|
||
return tot_jcscore | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) == 3: | ||
with open(sys.argv[1], 'rb') as truthfile: | ||
truthreader = csv.reader(truthfile) | ||
truth = [] | ||
for line in truthreader: | ||
row = [] | ||
for element in line: | ||
row.append(int(element)) | ||
truth.append(row) | ||
|
||
with open(sys.argv[2], 'rb') as predfile: | ||
predreader = csv.reader(predfile) | ||
pred = [] | ||
for line in predreader: | ||
row = [] | ||
for element in line: | ||
row.append(int(element)) | ||
pred.append(row) | ||
|
||
print('f1 score:') | ||
print(f1_community(pred, truth)) | ||
print('jc score:') | ||
print(jc_community(pred, truth)) | ||
print('nmi score:') | ||
print(nmi_community(pred, truth)) | ||
else: | ||
y = [[1, 1, 0, 1, 0], [0, 1, 0, 1, 1], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]] | ||
x1 = [[0, 1, 0, 1, 1], [1, 1, 0, 1, 0], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]] #same | ||
x2 = [[1, 1, 1, 1, 0], [0, 1, 0, 1, 1], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]] #1 error | ||
x3 = [[0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1]] #lots of error | ||
|
||
print('f1 score:') | ||
print(f1_community(x1, y)) | ||
print(f1_community(x2, y)) | ||
print(f1_community(x3, y)) | ||
|
||
print('jc score:') | ||
print(jc_community(x1, y)) | ||
print(jc_community(x2, y)) | ||
print(jc_community(x3, y)) | ||
|
||
print('nmi score:') | ||
print(nmi_community(x1, y)) | ||
print(nmi_community(x2, y)) | ||
print(nmi_community(x3, y)) |
Oops, something went wrong.