In [352]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
import math 


In [353]:
#set the parameters 
NUM_RANDOM_WALKS = 80
EPSILON = 0.15

In [354]:
filepath = "../edgelists/BlogCatalog-edgelist.txt"
embeddingsrecursive = "../embeddings/BlogCatalog-edgelist.txt.embeddings-recursive"
embeddingsiterative = "../embeddings/BlogCatalog-edgelist.txt.embeddings-iterative"

In [355]:
def parseEdgeList(graph_file, delimiter=" ", weighted=False, direction="undirected"):
    if(weighted == False):
        G = nx.read_edgelist(graph_file, delimiter=delimiter)
    else:
        G = nx.read_edgelist(graph_file, delimiter=delimiter, nodetype=int, data=(('weight',float),))
    print(G.number_of_nodes(), G.number_of_edges(), " loaded from ", graph_file)
    if(direction == "undirected"):
        return G.to_undirected()
    else:
        return G

In [356]:
G = parseEdgeList(filepath)

10312 333983  loaded from  ../edgelists/BlogCatalog-edgelist.txt


In [357]:
femb_recursive = open(embeddingsrecursive, 'w')
femb_iterative = open(embeddingsiterative, 'w')

In [358]:
# G = nx.Graph()
# #Small example
# G.add_nodes_from(["A","B","C","D","E","F","G","H","I","J","K","L"])
# # G.add_nodes_from(["A","B","C","D","E"])
# G.add_edge("A", "D")
# G.add_edge("A", "E")
# G.add_edge("A", "I")
# G.add_edge("A", "K")

# G.add_edge("B", "D")
# G.add_edge("B", "C")
# G.add_edge("B", "L")
# G.add_edge("B", "K")

# G.add_edge("C", "D")

# G.add_edge("D", "H")
# G.add_edge("D", "G")
# G.add_edge("D", "E")

# G.add_edge("E", "F")

# G.add_edge("F", "G")

# G.add_edge("I", "E")
# G.add_edge("I", "J")

# G.add_edge("K", "L")




# # Draw graph
# nx.draw(G, with_labels = True)
# plt.show()

In [359]:
def getPerNodeBudget(numNodes, budget):
    return math.floor(budget/numNodes)

In [360]:
def chooseNodes(list_nodes, n):
    return random.sample(population=list_nodes, k=n)

In [361]:
def updateContextPairs(context_pair, num_rand_walks_ending_here, context_pairs):
    if context_pair not in context_pairs:
        context_pairs[context_pair] = num_rand_walks_ending_here
    else:
        context_pairs[context_pair] = context_pairs[context_pair] +  num_rand_walks_ending_here

In [362]:
def BFSRandomWalk(graph, start, queue, context_pairs):
    random.seed(1)
    while queue:
        vertex, budget = queue.pop(0)
        vertex_neighbors = [n for n in G.neighbors(vertex)]
        num_neighbors = len(vertex_neighbors)
        m = getPerNodeBudget(num_neighbors, budget)
        remainder = budget - (m * num_neighbors)
        chosen_nodes = []
        if remainder > 0:
            chosen_nodes = chooseNodes(vertex_neighbors, remainder)
        for neighbor in vertex_neighbors:
            budget_for_this_node = m 
            if neighbor in chosen_nodes:
                budget_for_this_node = budget_for_this_node + 1
            num_rand_walks_ending_here =  math.floor(budget_for_this_node * EPSILON) 
            context_pair = str(start) + " " + str(neighbor)
            if(num_rand_walks_ending_here > 0):
                updateContextPairs(context_pair, num_rand_walks_ending_here, context_pairs)
            remaining_budget = math.floor(budget_for_this_node * (1 - EPSILON))
            if remaining_budget > 0:
                if remaining_budget > 1:
                    queue.append((neighbor, remaining_budget))
                else:
                    randval = random.random()
                    if randval < EPSILON:
                        queue.append((neighbor, remaining_budget))
                    else:
                        updateContextPairs(context_pair, 1, context_pairs)

In [363]:
def recursiveRandomWalk(graph, start, budget, context_pairs):
#     print(budget)
    random.seed(1)
    vertex_neighbors = [n for n in G.neighbors(start)]
    num_neighbors = len(vertex_neighbors)
    m = getPerNodeBudget(num_neighbors, budget)
    remainder = budget - (m * num_neighbors)
    chosen_nodes = []
    if remainder > 0:
        chosen_nodes = random.sample(population=vertex_neighbors, k=remainder)
    for neighbor in vertex_neighbors:
        budget_for_this_node = m 
        if neighbor in chosen_nodes:
            budget_for_this_node = budget_for_this_node + 1
        num_rand_walks_ending_here =  math.floor(budget_for_this_node * EPSILON)
#         print(budget_for_this_node, num_rand_walks_ending_here)
        context_pair = str(start) + " " + str(neighbor)
        if(num_rand_walks_ending_here > 0):
            updateContextPairs(context_pair, num_rand_walks_ending_here, context_pairs)
        remaining_budget = math.floor(budget_for_this_node * (1 - EPSILON))
        if remaining_budget >= 1:
            if remaining_budget > 1:
                recursiveRandomWalk(graph, neighbor, remaining_budget, context_pairs)    
            else:
                randval = random.random()
                if randval < EPSILON:
                    recursiveRandomWalk(graph, neighbor, remaining_budget, context_pairs)
                else:
                    updateContextPairs(context_pair, 1, context_pairs)

In [364]:
context_pairs = {}
print("recursive")
for startvertex in G:
    recursiveRandomWalk(G, startvertex, NUM_RANDOM_WALKS, context_pairs)
for (key, value) in context_pairs.items():
    femb_recursive.write(key + " " + str(value) + "\n" )
femb_recursive.close()
context_pairs = {}
print("non recursive")
for startvertex in G:
    queue = [(startvertex, NUM_RANDOM_WALKS)]
    BFSRandomWalk(G, startvertex, queue, context_pairs)
for (key, value) in context_pairs.items():
    femb_iterative.write(key + " " + str(value) + "\n" )
femb_iterative.close()

recursive
non recursive


In [365]:
# def RandomWalk(graph, start, budget, epsilon):
#     print("Starting vertex is: ", start)
#     vertex_neighbors = [n for n in G.neighbors(start)]
#     m = getPerNodeBudget(len(vertex_neighbors), budget) # 1
#     print("m is equal to: ", m)
    
#     candidatePairs = []
#     candidate1 = ""
#     candidate2 = ""
    
    
#     allbudgets = {}
#     ebudgets = {}
#     for node in graph:
#         ebudgets[node] = -1 # means that the e for the node is not set yet
#         allbudgets[node] = []
        
#     allbudgets[start].append(m)  #assign the budget to the start vertex
#     ebudgets[start] = 1
    
#     queue = [start]
#     visited = []
    
#     while queue:
#         vertex = queue.pop(0)  # get the vertex from queue
#         print("");print("Current vertex is: ", vertex)
#         candidate1 = vertex
            
#         vertex_neighbors = [n for n in G.neighbors(vertex)]
#         print("The neighbours of vertex: ", vertex, "are ->", vertex_neighbors)
        
#         if len(vertex_neighbors) >= len(allbudgets[vertex]): # case when a vertex has more neighbors than the budget
#             newVertexies = np.random.choice(vertex_neighbors, allbudgets[vertex], replace=False)
#             print("Randomly chose these vertexes:", newVertexies, "without replacement")
#         else:
#             newVertexies = np.random.choice(vertex_neighbors, allbudgets[vertex], replace=True)    
        

#         allbudgets, ebudgets = updateBudgets(vertex, newVertexies, allbudgets, ebudgets, epsilon)# update the budgets of the neighbours

        
#         queue = updateQueue(queue, ebudgets, epsilon ,allbudgets, newVertexies)
#         print("Curent queue became:", queue)
        

        
        
#     return visited

In [366]:
# def updateBudgets(currentVertex, newVertexies, allbudgets, ebudgets, e):
#     #note that if the node gets an e smaller than 0.2 he wont be assigned any budget
#     for vertex in newVertexies:
#         ebudgets[vertex] = random.uniform(0, 1) 
#         if ebudgets[vertex] > e:
#             allbudgets[vertex].append(1)

#     ebudgets[currentVertex] = -1 #set its own ebudget to -1 
#     allbudgets[currentVertex].pop(0) #set its own budget to 0
#     print("The allbudgets were updated as follows", allbudgets )
#     print("The ebudgets were updated as follows", ebudgets )
    
#     ebudgets = clearEbudgets(ebudgets, e)
    
#     return allbudgets, ebudgets
    

In [367]:
# def updateQueue(queue, ebudgets, e, allbudgets, newVertexies):
#     for vertex in newVertexies: 
#         if len(allbudgets[vertex])>0 and ebudgets[vertex]>e:
#             queue.append(vertex)
#     return queue

In [27]:
# def clearEbudgets(ebudgets, e):
#     #clears small e values
#     for vertex in ebudgets:
#         if ebudgets[vertex] >= 0 and ebudgets[vertex]<e:
#             ebudgets[vertex] = -1
#     return ebudgets

In [28]:

# for startvertex in G:
#     print("Running BFS from vertex:", startvertex)
#     print(RandomWalk(G, startvertex, budget, epsilon)) 

Running BFS from vertex: A
Starting vertex is:  A
m is equal to:  7

Current vertex is:  A
The neighbours of vertex:  A are -> ['D', 'E']


ValueError: Cannot take a larger sample than population when 'replace=False'

In [None]:
# def bfs2(graph, start):
#     visited = []
#     queue = [start]
#     while queue:
#         vertex = queue.pop(0)
#         if vertex not in visited:
#             visited.append(vertex)
#             vertex_neighbors = [n for n in G.neighbors(vertex)]
#             newVertexies = [x for x in vertex_neighbors if x not in visited]
#             queue.extend(newVertexies)
         
#     return visited

In [119]:
# vertex_neighbors = [n for n in G.neighbors("A")]
# l3 = [x for x in vertex_neighbors if x not in ['G']]
# l3

In [118]:
# list = [20, 30, 40, 50 ,60, 70, 80]
# sampling = np.random.choice(list, 4, replace=False)
# print("sampling with choices() ", sampling)

In [117]:
# # Execute 10 times this command sequence
# numOperations = 10
# for step in range(1, numOperations):
#     # Choose a random start node
#     vertexid = 1
#     # Dictionary that associate nodes with the amount of times it was visited
#     visited_vertices = {}
#     # Store and print path
#     path = [vertexid]
    
#     print("Step: %d" % (step))
#     # Restart the cycle
#     counter = 0
#     # Execute the random walk with size 100 (100 steps)
#     randomWalkSize = 100
#     for counter in range(1, randomWalkSize): 
#         # Extract vertex neighbours vertex neighborhood
#         vertex_neighbors = [n for n in G.neighbors(vertexid)]
#         # Set probability of going to a neighbour is uniform
#         probability = []
#         probability = probability + [1./len(vertex_neighbors)] * len(vertex_neighbors)
#         # Choose a vertex from the vertex neighborhood to start the next random walk
#         vertexid = np.random.choice(vertex_neighbors, p=probability)
#         # Accumulate the amount of times each vertex is visited
#         if vertexid in visited_vertices:
#             visited_vertices[vertexid] += 1
#         else:
#             visited_vertices[vertexid] = 1

#         # Append to path
#         path.append(vertexid)

#     # Organize the vertex list in most visited decrescent order
#     mostvisited = sorted(visited_vertices, key = visited_vertices.get,reverse = True)
#     print("Path: ", path)
    
#     # Separate the top 10 most visited vertex
#     print("Most visited nodes: ", mostvisited[:10])