Permalink
Browse files

Add files via upload

  • Loading branch information...
yangji9181 committed Feb 13, 2018
1 parent 9f3add8 commit 1285eec25f1175c4e2fc96f9cf46cc31663e7b2c
Showing with 1,264 additions and 0 deletions.
  1. +56 −0 core.py
  2. +59 −0 cross.py
  3. +160 −0 evaluate.py
  4. +188 −0 fit.py
  5. +66 −0 kmeans.py
  6. +30 −0 link.py
  7. +181 −0 model.py
  8. +119 −0 multiview.py
  9. +347 −0 preprocess.py
  10. +58 −0 regress.py
View
56 core.py
@@ -0,0 +1,56 @@
import numpy as np
import matplotlib.pyplot as plt
import pickle
cutperc = 0.1
core_friend = set()
core_user = set()
with open('../data/core_user', 'r') as f:
f.next()
for line in f:
core_user.add(line.strip())
with open('../data/core_friend', 'r') as f:
f.next()
for line in f:
core_friend.add(line.strip())
y_user = []
y_friend = []
y_rest = []
x_user = []
x_friend = []
x_rest = []
with open('../data/core_degree', 'r') as f:
count = 0
f.next()
for line in f:
words = line.split(',')
if words[0].strip() in core_user:
y_user.append(int(words[1].strip()))
x_user.append(count)
elif words[0].strip() in core_friend:
y_friend.append(int(words[1].strip()))
x_friend.append(count)
else:
y_rest.append(int(words[1].strip()))
x_rest.append(count)
count += 1
cutoff = np.log(count*cutperc)
percent = map(lambda i: i < cutoff, x_friend).count(True) *1.0 / len(x_friend)
print(cutoff, percent)
plt.scatter(np.log(y_friend), np.log(x_friend), marker='d', alpha=0.1, facecolors='none', edgecolors='r', label='K direct friends of new users')
plt.scatter(np.log(y_rest), np.log(x_rest), marker='.', alpha=0.05, facecolors='none', edgecolors='g', label='All other users')
plt.axhline(cutoff, ls='--', c='b', label='Cutoff at top 10% users \nwith highest degrees')
plt.legend(fontsize=15, )
plt.ylabel('Log(Rank By Degree)', fontsize=18)
plt.xlabel('Log(Degree)', fontsize=18)
plt.xticks(fontsize=0)
plt.yticks(fontsize=0)
#plt.scatter(x_user, y_user, marker='*', facecolors='none', edgecolors='b')
plt.savefig('../plots/fig_core_original.png', format='png', bbox_inches='tight')
plt.show()
plt.clf()
View
@@ -0,0 +1,59 @@
from evaluate import f1_community, jc_community, nmi_community
import pickle
import numpy as np
import matplotlib.pyplot as plt
with open('../data/labels.pkl', 'rb') as f:
labellist = pickle.load(f)
dim = len(labellist)
newllist = []
for i in xrange(dim):
labels = labellist[i]
unique = np.unique(labels)
newl = [[0]*len(labels) for j in range(len(unique))]
for j in xrange(len(labels)):
newl[labels[j]][j] = 1
newllist.append(newl)
cross_f1 = []
cross_jc = []
cross_nmi = []
for i in xrange(dim):
cross_f1 += [[0] * dim]
cross_jc += [[0] * dim]
cross_nmi += [[0] * dim]
for j in xrange(dim):
if i != j:
cross_f1[i][j] = f1_community(newllist[i], newllist[j])
cross_jc[i][j] = jc_community(newllist[i], newllist[j])
cross_nmi[i][j] = nmi_community(newllist[i], newllist[j])
else:
cross_f1[i][j] = 1
cross_jc[i][j] = 1
cross_nmi[i][j] = 1
with open('../data/cross.txt', 'w') as f:
for p in xrange(dim):
for q in xrange(dim):
f.write(str(cross_f1[p][q])+' ')
f.write('\n')
f.write('------------\n')
for p in xrange(dim):
for q in xrange(dim):
f.write(str(cross_jc[p][q])+' ')
f.write('\n')
f.write('------------\n')
for p in xrange(dim):
for q in xrange(dim):
f.write(str(cross_nmi[p][q])+' ')
f.write('\n')
plt.imshow(cross_f1, cmap='Blues', interpolation='nearest')
plt.savefig("../plots/f1_cross.png", bbox_inches='tight')
plt.imshow(cross_jc, cmap='Blues', interpolation='nearest')
plt.savefig("../plots/jc_cross.png", bbox_inches='tight')
plt.imshow(cross_nmi, cmap='Blues', interpolation='nearest')
plt.savefig("../plots/nmi_cross.png", bbox_inches='tight')
View
@@ -0,0 +1,160 @@
import math
import numpy as np
import sys
import csv
def h_utils(w, n):
if 0 == w:
return 0
return -w * math.log(float(w) / n)
def cover_entropy (xs, n):
tot_ent = 0.0
for x in xs:
tot_ent += h_utils(sum(x), n) + h_utils(n - sum(x), n)
return tot_ent
def calc_modified_conditional_matrix(pre_ys, true_ys, n):
results_0 = np.zeros((len(pre_ys), len(true_ys)))
results_1 = np.zeros((len(true_ys), len(pre_ys)))
for ind_p in range(0, len(pre_ys)):
pre_y = pre_ys[ind_p]
for ind_t in range(0, len(true_ys)):
true_y = true_ys[ind_t]
a = sum([ 0 == (py + ty) for (py, ty) in zip(pre_y, true_y)])
d = sum([ 2 == (py + ty) for (py, ty) in zip(pre_y, true_y)])
b = sum(true_y) - d
c = sum(pre_y) - d
t1 = h_utils(a, n) + h_utils(d, n)
t2 = h_utils(b, n) + h_utils(c, n)
t3 = h_utils(c + d, n) + h_utils(a + b, n)
t4 = h_utils(b + d, n) + h_utils(a + c, n)
if t1 >= t2:
results_0[ind_p][ind_t] = t1 + t2 - t3
results_1[ind_t][ind_p] = t1 + t2 - t4
else:
results_0[ind_p][ind_t] = t3
results_1[ind_t][ind_p] = t4
return results_0, results_1
def nmi_community(pre_ys, true_ys):
"""
Normalized Mutual Information to evaluate overlapping community finding algorithms
"""
n = len(pre_ys[0])
hx = cover_entropy(pre_ys, n)
hy = cover_entropy(true_ys, n)
hxy, hyx = calc_modified_conditional_matrix(pre_ys, true_ys, n)
hxy = sum([min(hxy_x) for hxy_x in hxy])
hyx = sum([min(hyx_y) for hyx_y in hyx])
return 0.5 * (hx + hy - hxy - hyx) / max(hx, hy)
def f1_pair(pred_y, true_y):
"""calculate f1 score for a pair of communities (predicted and ground truth)
args:
pred_y (N * 1): binary array, 1 means the corresponding instance belongs to predicted community
true_y (N * 1): binary array, 1 means the corresponding instance belongs to golden community
"""
corrected = sum([ 2 == (py + ty) for (py, ty) in zip(pred_y, true_y)])
if 0 == corrected:
return 0, 0, 0
precision = float(corrected) / sum(pred_y)
recall = float(corrected) / sum(true_y)
f1score = 2 * precision * recall / (precision + recall)
return f1score, precision, recall
def f1_community(pre_ys, true_ys):
"""calculate f1 score for two sets of communities (predicted and ground truth)
args:
pred_ys (k * N):
true_ys (l * N):
"""
tot_size = 0
tot_fscore = 0.0
for pre_y in pre_ys:
cur_size = sum(pre_y)
tot_size += cur_size
tot_fscore += max([f1_pair(pre_y, true_y)[0] for true_y in true_ys]) * cur_size
return float(tot_fscore) / tot_size
def jc_pair(pred_y, true_y):
"""calculate jc score for a pair of communities (predicted and ground truth)
args:
pred_y (N * 1): binary array, 1 means the corresponding instance belongs to predicted community
true_y (N * 1): binary array, 1 means the corresponding instance belongs to golden community
"""
corrected = sum([ 2 == (py + ty) for (py, ty) in zip(pred_y, true_y)])
if 0 == corrected:
return 0
tot = sum([ (py + ty) > 0 for (py, ty) in zip(pred_y, true_y)])
return float(corrected) / tot
def jc_community(pre_ys, true_ys):
"""calculate jc score for two sets of communities (predicted and ground truth)
args:
pred_ys (k * N):
true_ys (l * N):
"""
tot_jcscore = 0.0
tmp_size = float(1) / ( len(pre_ys) * 2 )
for pre_y in pre_ys:
tot_jcscore += max([jc_pair(pre_y, true_y) for true_y in true_ys]) * tmp_size
tmp_size = float(1) / ( len(true_ys) * 2 )
for true_y in true_ys:
tot_jcscore += max([jc_pair(pre_y, true_y) for pre_y in pre_ys]) * tmp_size
return tot_jcscore
if __name__ == "__main__":
if len(sys.argv) == 3:
with open(sys.argv[1], 'rb') as truthfile:
truthreader = csv.reader(truthfile)
truth = []
for line in truthreader:
row = []
for element in line:
row.append(int(element))
truth.append(row)
with open(sys.argv[2], 'rb') as predfile:
predreader = csv.reader(predfile)
pred = []
for line in predreader:
row = []
for element in line:
row.append(int(element))
pred.append(row)
print('f1 score:')
print(f1_community(pred, truth))
print('jc score:')
print(jc_community(pred, truth))
print('nmi score:')
print(nmi_community(pred, truth))
else:
y = [[1, 1, 0, 1, 0], [0, 1, 0, 1, 1], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]]
x1 = [[0, 1, 0, 1, 1], [1, 1, 0, 1, 0], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]] #same
x2 = [[1, 1, 1, 1, 0], [0, 1, 0, 1, 1], [1, 0, 0, 0, 1], [0, 0, 0, 1, 0]] #1 error
x3 = [[0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1]] #lots of error
print('f1 score:')
print(f1_community(x1, y))
print(f1_community(x2, y))
print(f1_community(x3, y))
print('jc score:')
print(jc_community(x1, y))
print(jc_community(x2, y))
print(jc_community(x3, y))
print('nmi score:')
print(nmi_community(x1, y))
print(nmi_community(x2, y))
print(nmi_community(x3, y))
Oops, something went wrong.

0 comments on commit 1285eec

Please sign in to comment.