-
Notifications
You must be signed in to change notification settings - Fork 0
/
page.py
31 lines (26 loc) · 861 Bytes
/
page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import sys
from pyspark import SparkContext
if __name__ == "__main__":
lines = sc.textFile("graph")
graph_rdd = lines.map(eval) \
.groupByKey() \
.mapValues(list) \
.partitionBy(10) \
.cache()
nodes = graph_rdd.flatMap(lambda (i, edgelist):edgelist+[i]) \
.distinct() \
.cache()
size = nodes.count()
scores = nodes.map(lambda i: (i, 1.0/size)) \
.partitionBy(10) \
.cache()
i = 0
eps = 0.001
max_iterations = 100
err = eps + 1.0
while i<max_iterations and err>eps:
i +=1
old_scores = old_scores
joined = graph_rdd.join(scores)
scores = joined.values() \
.flatMap(lambda (neighborlist, score): [(x, 1.0*score/)])