In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import csv
import pandas as pd
import random
from random import choice
from random import sample
from itertools import combinations
import random
import metis 

In [2]:
import time

def compute_bfs(graph, start_node):
    """
    Computes the Breadth-First Search (BFS) traversal of a graph and measures the time taken.

    Parameters:
    graph (networkx.Graph): The input graph.
    start_node (node): The node from which to start the BFS.

    Returns:
    list: The BFS traversal of the graph.
    float: The time taken to perform the BFS.
    """
    start_time = time.time()
    bfs_traversal = list(nx.bfs_tree(graph, start_node))
    end_time = time.time()
    time_taken = end_time - start_time

    return bfs_traversal, time_taken

# Usage:
# G_DBLP is your graph and 'node1' is the starting node for BFS
# bfs_traversal, time_taken = compute_bfs(G_DBLP, 'node1')
# print("BFS Traversal: ", bfs_traversal)
# print("Time Taken: ", time_taken)

# Graph init

In [4]:
# grap init
G_dblp = nx.Graph()


# dblp.tsv
with open('data/dblp/com-dblp/out.com-dblp.tsv', 'r') as file:
    for line in file:
        source, target = line.strip().split(' ')
        G_dblp.add_edge(int(source), int(target))

num_edges = G_dblp.number_of_edges()
num_nodes = G_dblp.number_of_nodes()


print("number of edges in dblp.tsv:", num_edges)
print("number of nodes in dblp.tsv:", num_nodes)
print(nx.is_connected(G_dblp))


number of edges in dblp.tsv: 1049866
number of nodes in dblp.tsv: 317080
True


In [5]:
# Read the CSV file into a DataFrame
file_path = 'data/twitch_gamers/large_twitch_edges.csv'
df = pd.read_csv(file_path)

# Initialize the graph
G_twitch = nx.Graph()

# Add edges from the DataFrame
G_twitch.add_edges_from(df.values)

# Get the number of edges and nodes
num_edges = G_twitch.number_of_edges()
num_nodes = G_twitch.number_of_nodes()

print("number of edges in dblp.tsv:", num_edges)
print("number of nodes in dblp.tsv:", num_nodes)
print(nx.is_connected(G_twitch))


number of edges in dblp.tsv: 6797557
number of nodes in dblp.tsv: 168114
True


In [6]:
# Read the CSV file into a DataFrame
file_path = 'data/facebook_clean_data/artist_edges.csv'
df = pd.read_csv(file_path)

# Initialize the graph
G_facebook = nx.Graph()

# Add edges from the DataFrame
G_facebook.add_edges_from(df.values)

# Get the number of edges and nodes
num_edges = G_facebook.number_of_edges()
num_nodes = G_facebook.number_of_nodes()

print("Number of undirected edges:", num_edges)
print("Number of undirected nodes:", num_nodes)
print(nx.is_connected(G_facebook))


Number of undirected edges: 819306
Number of undirected nodes: 50515
True


In [7]:
# Initialize Hyves graph
G_hyves_full = nx.DiGraph()  # Use DiGraph for directed graph

# hyves/out.tsv
file_path = 'data/hyves/out.tsv'
with open(file_path, 'r') as file:
    # Skip the header line
    next(file)
    
    for line in file:
        source, target = map(int, line.strip().split())
        G_hyves_full.add_edge(source, target)

# Find weakly connected components
weakly_connected_components = list(nx.weakly_connected_components(G_hyves_full))

# Find the Largest Weakly Connected Component (LWCC)
lwcc = max(weakly_connected_components, key=len)

# Create a subgraph for the LWCC
G_hyves = G_hyves_full.subgraph(lwcc)

# Convert the LWCC subgraph to an undirected graph
G_hyves = G_hyves.to_undirected()

# Get the number of edges and nodes in the undirected LWCC
num_lwcc_undirected_edges = G_hyves.number_of_edges()
num_lwcc_undirected_nodes = G_hyves.number_of_nodes()

print("Number of edges in undirected LWCC:", num_lwcc_undirected_edges)
print("Number of nodes in undirected LWCC:", num_lwcc_undirected_nodes)
print(nx.is_connected(G_hyves))



Number of edges in undirected LWCC: 2777419
Number of nodes in undirected LWCC: 1402673
True


In [8]:


# Initialize Hyves graph
G_petser_full = nx.DiGraph()  # Use DiGraph for directed graph

# hyves/out.tsv
file_path = 'data/petster-friendships-cat/out.tsv'
with open(file_path, 'r') as file:
    # Skip the header line
    next(file)
    
    for line in file:
        source, target = map(int, line.strip().split())
        G_petser_full.add_edge(source, target)

# Find weakly connected components
weakly_connected_components = list(nx.weakly_connected_components(G_petser_full))

# Find the Largest Weakly Connected Component (LWCC)
lwcc = max(weakly_connected_components, key=len)

# Create a subgraph for the LWCC
G_petser = G_petser_full.subgraph(lwcc)

# Convert the LWCC subgraph to an undirected graph
G_petser = G_petser.to_undirected()

# Get the number of edges and nodes in the undirected LWCC
num_lwcc_undirected_edges = G_petser.number_of_edges()
num_lwcc_undirected_nodes = G_petser.number_of_nodes()

print("Number of edges in undirected LWCC:", num_lwcc_undirected_edges)
print("Number of nodes in undirected LWCC:", num_lwcc_undirected_nodes)
print(nx.is_connected(G_petser))



Number of edges in undirected LWCC: 5448486
Number of nodes in undirected LWCC: 148826
True


# BFS time

In [11]:
# Assuming G_DBLP is your graph
bfs_traversal, time_taken = compute_bfs(G_dblp, 1)
print("Time Taken: ", time_taken)

Time Taken:  6.478215932846069


In [12]:
# Assuming G_DBLP is your graph
bfs_traversal, time_taken = compute_bfs(G_twitch, 1)
print("Time Taken: ", time_taken)

Time Taken:  2.4345648288726807


In [13]:
# Assuming G_DBLP is your graph
bfs_traversal, time_taken = compute_bfs(G_facebook, 1)
print("Time Taken: ", time_taken)

Time Taken:  6.439209938049316


In [14]:
# Assuming G_DBLP is your graph
bfs_traversal, time_taken = compute_bfs(G_hyves, 1)
print("Time Taken: ", time_taken)

Time Taken:  25.322427988052368


In [15]:
# Assuming G_DBLP is your graph
bfs_traversal, time_taken = compute_bfs(G_petser, 1)
print("Time Taken: ", time_taken)

Time Taken:  1.0410609245300293
