In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
import math 
import time
import numpy as np
from collections import Counter
from tqdm import tqdm
import array as arr
import copy

filepathTXT = "../edgelists/BlogCatalog-edgelist.txt"
filepathCSV = "../edgelists/BlogCatalog-edgelist.csv"
embeddingsrecursive = "../embeddings/BlogCatalog-edgelist.txt.embeddings-recursive"
embeddingsiterative = "../embeddings/BlogCatalog-edgelist.txt.embeddings-iterative"

In [2]:
def parseEdgeList2(graph_file, direction="undirected"):
    # Create Graph
    G = nx.Graph()
    # Create head
    colNames=["Start", "End"]
    edgeData = pd.read_csv(filepathCSV, names=colNames)

    #Add nodes
    nodes = []
    #loop throug data records
    for i in range (0, edgeData.shape[0]):
        #append every node
        nodes.append(edgeData.iloc[i,0])
        nodes.append(edgeData.iloc[i,1])
    #creating a set of nodes    
    nodes = set(nodes)
    #sorting the nodes in increasing order
    uniqueNodes = (list(nodes))
    uniqueNodes.sort()
    #adding the nodes to the graph
    G.add_nodes_from(uniqueNodes)

    # Add edges
    #loop from 0 to amount of records
    edgeCount = 0
    for i in range (0, edgeData.shape[0]):
        edgeCount += 1
        #add the edge to the graph
        G.add_edge(edgeData.iloc[i,0], edgeData.iloc[i,1])
    print("Nodes: ", G.number_of_nodes()," Edges: ", G.number_of_edges(), " loaded from ", graph_file)

    if(direction == "undirected"):
        return G.to_undirected()
    else:
        return G

In [3]:
def getAdjNPList(graph):
    adjdict = {}
    for vertex in graph:
        adjdict[vertex] = np.array([n for n in G.neighbors(vertex)]) 
        np.random.shuffle(adjdict[vertex])
    return adjdict


In [None]:
femb_recursive = open(embeddingsrecursive, 'w')
femb_iterative = open(embeddingsiterative, 'w')

## Toy Graph

In [4]:
def toyGraph():
    G = nx.Graph()
    G.add_nodes_from([1,2,3,4,5,6,7,8,9,10,11])
    G.add_edge(2, 1);G.add_edge(3, 1);G.add_edge(3, 2);G.add_edge(4, 1);
    G.add_edge(4, 2);G.add_edge(4, 3);G.add_edge(5, 1);G.add_edge(6, 1);
    G.add_edge(7, 1);G.add_edge(7, 5);G.add_edge(7, 6);G.add_edge(8, 1);
    G.add_edge(8, 2);G.add_edge(8, 3);G.add_edge(8, 4);G.add_edge(9, 1); 
    G.add_edge(9, 3);G.add_edge(10, 3);G.add_edge(11, 1);G.add_edge(11, 5);
    print("Nodes: ", G.number_of_nodes()," Edges: ", G.number_of_edges())
    # Draw graph
    # nx.draw(G, with_labels = True)
    # plt.show()
    return G

### NP implementation

In [5]:
def chooseNodes(list_nodes, sample_size):
    return random.sample(population=list_nodes, k=sample_size) # 2.22 s ± 179 ms per loop  (labesl str)

def getPerNodeBudget(numNodes, budget):
    return math.floor(budget/numNodes)

def storeContextPairs(context_pair, budget, context_pairs):
    if context_pair not in context_pairs:
        context_pairs[context_pair] = budget
    else:
        context_pairs[context_pair] = context_pairs[context_pair] +  budget
        
def updateContextPairs(window, window_count, context_pairs):
    lastNode = window[window_count]
    labelOfLastNode, budgetOfLastNode = lastNode
    index_count = 0
    window_except_last = window[:window_count]
    window_reversed = window_except_last[::-1]
    for node in window_reversed:
        node_label = node[0]
        if index_count == ORIGINAL_WINDOW_SIZE:
            break
        context_pair1 = str(labelOfLastNode) +","+ str(node_label)
        context_pair2 = str(node_label) + "," + str(labelOfLastNode)
        storeContextPairs(context_pair1, budgetOfLastNode, context_pairs)
        storeContextPairs(context_pair2, budgetOfLastNode, context_pairs)
        index_count = index_count + 1

def addNewNodeToWindow(tempwindow, temp_window_count, window_size, vertex, budget):
    newWindowElement = np.array([[vertex,budget]])
    if temp_window_count+1 == window_size:
        tempwindow = tempwindow[1:]
        tempwindow[temp_window_count] = newWindowElement
        return tempwindow, temp_window_count

    tempwindow[temp_window_count+1] = newWindowElement
    temp_window_count+=1
    return tempwindow, temp_window_count

def BFSRandomWalkWindow(queue, queue_len, queue_pop_idx, queue_add_idx, context_pairs, window_size, walk_lenght):
    queue_buffer_size = (queue.size/5)-1
    while queue_len > 0:
        vertex, budget, current_walk_lenght, window_count, window = queue[queue_pop_idx] 
        queue_len -=1
        if queue_len > 0:
            if queue_pop_idx == queue_buffer_size:
                queue_pop_idx = 0
            else:
                queue_pop_idx += 1
        vertex_neighbors = adjdict[vertex]
        num_neighbors = vertex_neighbors.size
        m = getPerNodeBudget(num_neighbors, budget)
        remainder = budget - (m * num_neighbors)
        if remainder > 0:
            np.random.shuffle(vertex_neighbors) 
        else:
            remainder = num_neighbors 
        current_walk_lenght += 1
        for neighbor in vertex_neighbors[:remainder]:
            budget_for_this_node = m 
            temp_window = np.copy(window)
            temp_window_count = window_count
            if remainder != num_neighbors:
                budget_for_this_node = budget_for_this_node + 1
            temp_window, temp_window_count = addNewNodeToWindow(temp_window, temp_window_count, window_size, neighbor, budget_for_this_node) 
            updateContextPairs(temp_window, temp_window_count, context_pairs) 
            if current_walk_lenght < walk_lenght:
                newQueueElement = np.array([[neighbor, budget_for_this_node, current_walk_lenght, temp_window_count, temp_window]])
                queue[queue_add_idx] = newQueueElement
                if queue_add_idx == queue_buffer_size:
                    queue_add_idx = 0
                else:
                    queue_add_idx += 1
                queue_len+=1

### Parametrs

In [None]:
# Set toy parameters and graph
WALK_LENGHT = 3
BUDGET = 1
ORIGINAL_WINDOW_SIZE = 1
WINDOW_SIZE = ORIGINAL_WINDOW_SIZE*2+1
QUEUE_BUFFER_SIZE = (BUDGET*WALK_LENGHT)-(BUDGET*2) + 1

G = toyGraph()
adjdict = getAdjNPList(G)

In [7]:
# Set the actual parameters and graph
WALK_LENGHT = 40
#BUDGET = 80
BUDGET = 1
ORIGINAL_WINDOW_SIZE = 10
WINDOW_SIZE = ORIGINAL_WINDOW_SIZE*2+1
QUEUE_BUFFER_SIZE = (BUDGET*WALK_LENGHT)-(BUDGET*2) + 1
#G = parseEdgeList1(filepathTXT) #String labels 

G = parseEdgeList2(filepathCSV) # Integer labels
adjdict = getAdjNPList(G)

Nodes:  10312  Edges:  333983  loaded from  ../edgelists/BlogCatalog-edgelist.csv


## NP Runner

In [8]:
def Runner():

    random.seed(0)
    rand=random.Random(0)
    context_pairs = {}    
    print("Running BFSRandomWalkWindow...")

    for startvertex in adjdict.keys():
        window_count = 0
        WINDOW = np.zeros(shape=(WINDOW_SIZE+20,2),dtype=int)
        firstWindowElement = np.array([[startvertex, BUDGET]])
        WINDOW[window_count] = firstWindowElement
        
        queue_len = 0
        queue_pop_idx = 0
        queue_add_idx = 0
        queue = np.zeros(shape=(BUDGET,5),dtype=object)
        firstQueueElement = np.array([[startvertex, BUDGET, 1, window_count, WINDOW]])
        queue[queue_len] = firstQueueElement
        queue_len+=1
 
        BFSRandomWalkWindow(queue, queue_len, queue_pop_idx, queue_add_idx, context_pairs, WINDOW_SIZE, WALK_LENGHT)
    return context_pairs

## Run

In [9]:
contextPairs = Runner()

Running BFSRandomWalkWindow...


In [10]:
# Count the total count sum
countSum = 0
for key, value in contextPairs.items():
    countSum += value
print("Total value sums up to: ", countSum)

Total value sums up to:  7115280


In [None]:
contextPairs

### Writing to file

In [None]:
print("Writing context pairs to file...")    
#Writing to file    
for (key, value) in contextPairs.items():
    
    femb_iterative.write(str(key) + " " + str(value) + "\n" )
femb_iterative.close()