In [38]:
with open('../data/v2v.txt', 'r') as file:
    edges = [tuple(map(int, line.strip().split())) for line in file]

In [39]:
graph = {}
num_nodes = 0
num_edges = 0

for s, d in edges:
    if s not in graph:
        num_nodes += 1
        graph[s] = []
    if d not in graph:
        num_nodes += 1
        graph[d] = []

    num_edges += 1
    graph[s].append(d)

In [40]:
def pagerank(adjacency_list, damping_factor=0.85, max_iterations=100, tolerance=1e-6):
    # Initialize variables
    num_nodes = len(adjacency_list)

    initial_pr = 1 / num_nodes
    page_rank = {node: initial_pr for node in adjacency_list}
    
    # Main iteration loop
    for _ in range(max_iterations):
        new_page_rank = {}
        for node in adjacency_list:
            new_rank = (1 - damping_factor) / num_nodes
            new_rank += damping_factor * sum(
                page_rank[neighbor] / len(adjacency_list[neighbor]) 
                for neighbor in adjacency_list if node in adjacency_list[neighbor]
            )

            new_page_rank[node] = new_rank
        
        # Check for convergence
        if all(abs(new_page_rank[node] - page_rank[node]) < tolerance for node in adjacency_list):
            break
        
        page_rank = new_page_rank
    
    return page_rank

In [41]:
rank = pagerank(graph)

In [42]:
neighbor = {}
for s in [688,387,277,876,999,1777,6319]:
    neighbor[s] = graph[s]
    for node in graph:
        if s in graph[node]:
            neighbor[s].append(node)
neighbor

{688: [298,
  559,
  1004,
  1131,
  1530,
  1531,
  1532,
  1533,
  1534,
  1535,
  1914,
  226,
  2879,
  1648,
  80,
  250,
  2440,
  94,
  229,
  854,
  3583,
  3585,
  1112,
  3665,
  374,
  621,
  6097,
  5610,
  3243,
  3844,
  3845,
  3937,
  681,
  692,
  3077,
  716,
  997,
  4267,
  1197,
  1362,
  1386,
  1429,
  1432,
  4551,
  1546,
  4583,
  1587,
  4629,
  1926,
  1965,
  4865,
  5824,
  4877,
  5008,
  5201,
  2325,
  2371,
  2717,
  5299,
  5372,
  5629,
  5749,
  6342,
  6365,
  6569,
  6720,
  6919,
  6928,
  7415,
  7818,
  7610,
  7783,
  8519,
  8465,
  8555,
  8801],
 387: [181,
  1471,
  1939,
  2190,
  3667,
  3709,
  3710,
  3711,
  3712,
  3713,
  80,
  2440,
  135,
  258,
  1417,
  3583,
  1112,
  2341,
  384,
  5958,
  590,
  3845,
  4004,
  2748,
  4117,
  926,
  4179,
  2459,
  2749,
  1156,
  4348,
  4394,
  1362,
  1686,
  1758,
  4766,
  4937,
  5194,
  5279,
  5554,
  5564,
  5596,
  3291,
  5621,
  5906,
  6283,
  6296,
  6342,
  6420,
  6435,
  651

In [43]:
for s in [688,387,277,876,999,1777,6319]:
    div_rank = {}
    for node in neighbor[s]:
        
        div_rank[node] = abs(rank[s]-rank[node])
    top_nodes = sorted(div_rank, key=div_rank.get)[:10]
    # top_nodes = sorted(div_rank.items(),key=lambda x:x[1])[:10]
    print(f'{s}: {top_nodes}')
    

688: [226, 229, 854, 1131, 5008, 1531, 1914, 1535, 621, 6097]
387: [1471, 181, 1939, 3667, 3710, 3709, 3713, 3845, 2748, 3711]
277: [224, 854, 1606, 350, 346, 2198, 1744, 2675, 270, 1882]
876: [854, 304, 569, 877, 1015, 885, 2545, 878, 3845, 883]
999: [385, 391, 917, 3785, 2209, 3246, 206, 17, 3806, 2303]
1777: [3220, 2066, 2209, 5008, 3035, 269, 2545, 1251, 2923, 3017]
6319: [7089, 5366, 5424, 6033, 6510, 5312, 7381, 7102, 1549, 1266]


In [12]:
# Calculate similarity with in-neighbors
for s in [688,387,277,876,999,1777,6319]:
    div_rank = {}
    for node in input_graph[s]:
        div_rank[node] = abs(rank[s]-rank[node])
    top_nodes = sorted(div_rank, key=div_rank.get)[:10] 
    print(f'{s}: {top_nodes}')
    

688: [226, 229, 854, 5008, 1914, 621, 6097, 2371, 5299, 94]
387: [3845, 2748, 135, 1156, 1112, 258, 1417, 2459, 4004, 4117]
277: [854, 350, 2198, 1744, 270, 1882, 6111, 1124, 1662, 2545]
876: [854, 304, 1015, 2545, 3845, 135, 4072, 3620, 4583, 1648]
999: [385, 391, 3785, 2209, 206, 1662, 4904, 997, 2748, 1926]
1777: [2209, 5008, 2545, 1251, 3017, 3721, 719, 764, 4153, 4675]
6319: [8281, 4551, 4950]


In [13]:
# Calculate similarity with out-neighbors
for s in [688,387,277,876,999,1777,6319]:
    div_rank = {}
    for node in graph:
        div_rank[node] = abs(rank[s]-rank[node])
    top_nodes = sorted(div_rank, key=div_rank.get)[:10] 
    print(f'{s}: {top_nodes}')
    

688: [688, 226, 842, 876, 389, 388, 227, 222, 223, 386]
387: [387, 693, 48, 392, 224, 275, 1086, 1297, 672, 3079]
277: [277, 225, 271, 274, 279, 272, 887, 391, 278, 229]
876: [876, 842, 226, 688, 223, 31, 389, 391, 388, 227]
999: [999, 276, 821, 275, 385, 230, 221, 541, 47, 48]
1777: [1777, 3968, 3370, 2573, 501, 2663, 2768, 2327, 1001, 952]
6319: [6319, 5494, 4373, 7089, 7183, 7184, 7185, 6758, 3684, 4546]


In [20]:
for s in [688,387,277,876,999,1777,6319]:
    div_rank = {}
    
    s_mean_in =0
    for in_node in input_graph[s]:
          s_mean_in+= rank[in_node]
    s_mean_in/=len(input_graph[s])
        
    s_mean_out=0
    for out_node in graph[s]:
          s_mean_out+= rank[out_node]
    s_mean_out/=len(graph[s])
    
    
    for node in neighbor[s]:
        
        mean_in=0
        if len(input_graph[node])>0:
            for in_node in input_graph[node]:
                 mean_in+= rank[in_node]
            mean_in/=len(input_graph[node])
        
        mean_out=0
        if len(graph[node])>0:
          for out_node in graph[node]:
            mean_out+= rank[out_node]
          mean_out/=len(graph[node])
        
        div_rank[node] = ((rank[s]-rank[node])**2 + (s_mean_in-mean_in)**2 + (s_mean_out-mean_out)**2)**0.5
    top_nodes = sorted(div_rank, key=div_rank.get)[:10] 
    print(f'{s}: {top_nodes}')
    

688: [226, 854, 229, 1131, 1531, 5299, 1535, 4865, 1530, 94]
387: [181, 1939, 1471, 3710, 3711, 1156, 3709, 3713, 2459, 3712]
277: [224, 1606, 350, 346, 2675, 1319, 2678, 1124, 2680, 854]
876: [304, 854, 569, 4072, 885, 878, 877, 3195, 3568, 882]
999: [917, 3246, 206, 1689, 17, 3806, 2303, 5079, 4298, 391]
1777: [1251, 2066, 3220, 2248, 509, 719, 269, 2923, 4718, 4367]
6319: [6510, 5424, 4950, 5366, 7102, 7375, 8281, 1159, 4730, 6106]
