In [9]:
import index_database as db
from typing import Iterator

In [10]:
alpha = 0.15
iterations_count = 100

In [11]:
all_pages : list[db.Urls] = []
calculated_ranks : dict[db.Urls, float] = { }
outgoing_links : dict[db.Urls, list[db.Urls]] = { }

In [12]:
def map_items(pages : Iterator[db.Urls]) -> Iterator[tuple[db.Urls, float]]:
    for current_page in pages: 
        links = outgoing_links[current_page]
        rank = calculated_ranks[current_page]
        
        yield current_page, 0

        if len(links) > 0:
            for page in links:
                yield page, rank / len(links)
        else:
            for page in all_pages:
                yield page, rank / len(all_pages)

def shuffle_items(items : Iterator[tuple[db.Urls, float]]) -> Iterator[tuple[db.Urls, list[float]]]:
    sorted_items = sorted(items, key = lambda url : url[0].id)
    buffer = []
    current_index = None

    for (index, item) in sorted_items:
        if(current_index == None):
            buffer = [item]
        elif current_index == index:
            buffer.append(item)
        else:
            yield   (current_index, buffer)
            buffer = [item]
        current_index = index
    yield (current_index, buffer)

def reduce_items(items : Iterator[tuple[db.Urls, list[float]]]) -> Iterator[tuple[db.Urls, float]]:
    for page, ranks in items:
        yield page, alpha / len(all_pages) + (1 - alpha) * sum(ranks)

In [13]:
def do_iteration():
    mapped = map_items(all_pages)
    shuffled = shuffle_items(mapped)
    reduced = reduce_items(shuffled)

    for page, rank in reduced:
        calculated_ranks[page] = rank
    return

In [14]:
db.connect()

all_pages = list(db.Urls.get_all_indexed_pages())
for page in all_pages:
    calculated_ranks[page] = 1 / len(all_pages)
    outgoing_links[page] = list(page.get_all_indexed_outgoing_links())

db.close()

In [15]:
for i in range(iterations_count):
    do_iteration()

In [16]:
keys = sorted(calculated_ranks, key = lambda key : calculated_ranks[key], reverse= True)

for key in keys:
    print("Page:\t", key.url)
    print("Rank:\t", calculated_ranks[key])
    print()

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/ClusterSetup.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CommandsManual.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/FileSystemShell.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/Compatibility.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/DownstreamDev.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html
Rank:	 0.050132625994694965

Page:	 https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/InterfaceClassificat