In [49]:
from pregel import Vertex, Pregel
import index_database as db

In [50]:
class PageRankVertex(Vertex):
    steps_count = 100
    alpha = 0.15
    def __init__(self,id,value,out_vertices, vertices_count):
        super().__init__(id,value,out_vertices)

        self.vertices_count = vertices_count

    def update(self):
        if self.superstep > PageRankVertex.steps_count:
            self.active = False
            return
        
        self.outgoing_messages = []
        outgoing_rank = self.value / len(self.out_vertices)
        for outgoing_vertex in self.out_vertices:
            self.outgoing_messages.append((outgoing_vertex, outgoing_rank))

        self.value = PageRankVertex.alpha / self.vertices_count + (1 - PageRankVertex.alpha) * sum(map(lambda item : item[1], self.incoming_messages))


In [51]:
def create_vertices():
    all_pages = list(db.Urls.get_all_indexed_pages())
    vertices = {}
    outgoing_links = {}
    for page in all_pages:
        outgoing_links[page] = list(page.get_all_indexed_outgoing_links())
        vertices[page] = PageRankVertex(page, 1 / len(all_pages), [], len(all_pages))

    for page in all_pages:
        vertex = vertices[page]
        out_vertices = list(map(lambda item : vertices[item], outgoing_links[page]))
        vertex.out_vertices = out_vertices
    
    return vertices.values()


In [52]:
vertices = create_vertices()

pregel = Pregel(vertices, 8)
pregel.run()

data = sorted(pregel.vertices, key = lambda item : item.value, reverse = True)

for vertex in data:
    print(vertex.id.url)
    print(vertex.value)

https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/ClusterSetup.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CommandsManual.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/FileSystemShell.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/Compatibility.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/DownstreamDev.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/InterfaceClassification.html
0.05012001708008404
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/index.h