In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
import math 
import time
import numpy as np
from collections import Counter
from tqdm import tqdm
import array as arr
import copy

filepathTXT = "../edgelists/BlogCatalog-edgelist.txt"
filepathCSV = "../edgelists/BlogCatalog-edgelist.csv"
embeddingsrecursive = "../embeddings/BlogCatalog-edgelist.txt.embeddings-recursive"
embeddingsiterative = "../embeddings/BlogCatalog-edgelist.txt.embeddings-iterative"

In [2]:
def parseEdgeList2(graph_file, direction="undirected"):
    # Create Graph
    G = nx.Graph()
    # Create head
    colNames=["Start", "End"]
    edgeData = pd.read_csv(filepathCSV, names=colNames)

    #Add nodes
    nodes = []
    #loop throug data records
    for i in range (0, edgeData.shape[0]):
        #append every node
        nodes.append(edgeData.iloc[i,0])
        nodes.append(edgeData.iloc[i,1])
    #creating a set of nodes    
    nodes = set(nodes)
    #sorting the nodes in increasing order
    uniqueNodes = (list(nodes))
    uniqueNodes.sort()
    #adding the nodes to the graph
    G.add_nodes_from(uniqueNodes)

    # Add edges
    #loop from 0 to amount of records
    edgeCount = 0
    for i in range (0, edgeData.shape[0]):
        edgeCount += 1
        #add the edge to the graph
        G.add_edge(edgeData.iloc[i,0], edgeData.iloc[i,1])
    print("Nodes: ", G.number_of_nodes()," Edges: ", G.number_of_edges(), " loaded from ", graph_file)

    if(direction == "undirected"):
        return G.to_undirected()
    else:
        return G

In [3]:
def getAdjNPList(graph):
    adjdict = {}
    for vertex in graph:
        adjdict[vertex] = np.array([n for n in G.neighbors(vertex)]) 
        np.random.shuffle(adjdict[vertex])
    return adjdict


In [None]:
femb_recursive = open(embeddingsrecursive, 'w')
femb_iterative = open(embeddingsiterative, 'w')

## Toy Graph

In [None]:
def toyGraph():
    G = nx.Graph()
    G.add_nodes_from([1,2,3,4,5,6,7,8,9,10,11])
    G.add_edge(2, 1);G.add_edge(3, 1);G.add_edge(3, 2);G.add_edge(4, 1);
    G.add_edge(4, 2);G.add_edge(4, 3);G.add_edge(5, 1);G.add_edge(6, 1);
    G.add_edge(7, 1);G.add_edge(7, 5);G.add_edge(7, 6);G.add_edge(8, 1);
    G.add_edge(8, 2);G.add_edge(8, 3);G.add_edge(8, 4);G.add_edge(9, 1); 
    G.add_edge(9, 3);G.add_edge(10, 3);G.add_edge(11, 1);G.add_edge(11, 5);
    print("Nodes: ", G.number_of_nodes()," Edges: ", G.number_of_edges())
    # Draw graph
    # nx.draw(G, with_labels = True)
    # plt.show()
    return G

### NP implementation

In [4]:
def chooseNodes(list_nodes, sample_size):
    return random.sample(population=list_nodes, k=sample_size) # 2.22 s ± 179 ms per loop  (labesl str)

def getPerNodeBudget(numNodes, budget):
    return math.floor(budget/numNodes)

def storeContextPairs(context_pair, budget, context_pairs):
    if context_pair not in context_pairs:
        context_pairs[context_pair] = budget
    else:
        context_pairs[context_pair] = context_pairs[context_pair] +  budget
        
def updateContextPairs(window, window_count, context_pairs):
    lastNode = window[window_count]
   
    labelOfLastNode, budgetOfLastNode = lastNode
    if window_count > ORIGINAL_WINDOW_SIZE:
        new_window = window[ORIGINAL_WINDOW_SIZE:window_count]
    else:
        new_window = window[:window_count]

    for node in new_window:
        node_label = node[0]
        context_pair1 = str(labelOfLastNode) +","+ str(node_label)
        context_pair2 = str(node_label) + "," + str(labelOfLastNode)
        storeContextPairs(context_pair1, budgetOfLastNode, context_pairs)
        storeContextPairs(context_pair2, budgetOfLastNode, context_pairs)
           
def addNewNodeToWindow(tempwindow, temp_window_count, window_size, vertex, budget):
    if temp_window_count == window_size:
        tempwindow = tempwindow[1:]
        
    newWindowElement = np.array([[vertex,budget]])
    tempwindow[temp_window_count+1] = newWindowElement
    temp_window_count+=1
    return tempwindow, temp_window_count

def BFSRandomWalkWindow(queue, queue_count, context_pairs, window_size, walk_lenght):
   
    while queue_count > -1:
        vertex, budget, current_walk_lenght, window, window_count = queue[queue_count]
        queue_count -= 1
        queue = queue[1:]
        vertex_neighbors = adjdict[vertex]
        num_neighbors = vertex_neighbors.size
        m = getPerNodeBudget(num_neighbors, budget)
        remainder = budget - (m * num_neighbors)

        if remainder > 0:
            np.random.shuffle(vertex_neighbors) 
        else:
            remainder = num_neighbors 
        current_walk_lenght += 1
        
        for neighbor in vertex_neighbors[:remainder]:
            budget_for_this_node = m 
            temp_window = np.copy(window)
            temp_window_count = window_count
            
            if remainder != num_neighbors:
                budget_for_this_node = budget_for_this_node + 1
            temp_window, temp_window_count = addNewNodeToWindow(temp_window, temp_window_count, window_size, neighbor, budget_for_this_node) 
            updateContextPairs(temp_window, temp_window_count, context_pairs) 
            if current_walk_lenght < walk_lenght:
                newQueueElement = np.array([[neighbor, budget_for_this_node, current_walk_lenght, temp_window, temp_window_count]])
                queue[queue_count+1] = newQueueElement
                queue_count+=1

### Parametrs

In [5]:
# Set the actual parameters and graph
WALK_LENGHT = 40
BUDGET = 80
ORIGINAL_WINDOW_SIZE = 10
G = parseEdgeList2(filepathCSV) # Integer labels
adjdict = getAdjNPList(G)

Nodes:  10312  Edges:  333983  loaded from  ../edgelists/BlogCatalog-edgelist.csv


In [None]:
# Set toy parameters and graph
WALK_LENGHT = 3
BUDGET = 1
ORIGINAL_WINDOW_SIZE = 1
G = toyGraph()
adjdict = getAdjNPList(G)

## NP Runner

In [6]:
def Runner():
    start = time.time()
    random.seed(0)
    rand=random.Random(0)
    
    context_pairs = {}    
    WINDOW_SIZE = ORIGINAL_WINDOW_SIZE*2+1
    
    print("Running BFSRandomWalkWindow...")
    count = 0
    for startvertex in adjdict.keys():
        count+=1
        if count == 10000:
            end = time.time()
            result = end - start
            print("10000 iterations in ->", result)
            break

        #print("Running from -> ", startvertex)
        
        window_count = 0
        WINDOW = np.zeros(shape=(WINDOW_SIZE+20,2),dtype=int)
        firstWindowElement = np.array([[startvertex, BUDGET]])
        WINDOW[window_count] = firstWindowElement
 
        queue_count = 0
        queue = np.zeros(shape=(BUDGET+100,5),dtype=object)
        firstQueueElement = np.array([[startvertex, BUDGET, 1, WINDOW, window_count]])
        queue[queue_count] = firstQueueElement

        BFSRandomWalkWindow(queue, queue_count, context_pairs, WINDOW_SIZE, WALK_LENGHT)
    return context_pairs


## Run

In [7]:
contextPairs = Runner()

Running BFSRandomWalkWindow...
10000 iterations in -> 115.18561029434204


In [8]:
# Count the total count sum
countSum = 0
for key, value in contextPairs.items():
    countSum += value
print("Total value sums up to: ", countSum)

Total value sums up to:  29138736


### Writing to file

In [None]:
print("Writing context pairs to file...")    
#Writing to file    
for (key, value) in contextPairs.items():
    
    femb_iterative.write(str(key) + " " + str(value) + "\n" )
femb_iterative.close()