In [47]:
import sys
import os
sys.path.append("../infrastructure")
from maximal_independent_set import maximal_n_hop_independent_set
from collections import defaultdict

In [94]:
# Process facebook graph

def kth_order_neighborhood(network, node, k):
    if k == 0:
        return {node}
    
    neighbors = set([node])
    visited = set([node])

    for _ in range(k):
        temp_neighbors = set()
        for neighbor in neighbors:
            temp_neighbors.update(set(network[neighbor]))
        temp_neighbors -= visited
        neighbors = temp_neighbors
        visited.update(temp_neighbors)

    return neighbors

def find_best_5_hop_ind_set(graph):
    best_set_size = float('-inf')
    best_effective_set_size = float('-inf')
    best_effective_set = None

    for _ in range(10):
        # Run your code here
        ind_set = maximal_n_hop_independent_set(graph, 5)
        
        # go through each element of result and check if 
        # its first and second order neighborhoods are both non-empty
        effective_ind_set = []
        for node in ind_set:
            if graph[node] != [] and [graph[nb] for nb in graph[node]] != []:
                effective_ind_set.append(node)
                
        # Update the largest value if necessary
        if len(effective_ind_set) > best_effective_set_size:
            best_effective_set_size = len(effective_ind_set)
            best_effective_set = effective_ind_set
            best_set_size = len(ind_set)

    return best_set_size, best_effective_set_size, best_effective_set

In [93]:
file_path = "./data/facebook_combined.txt"

def process_file_to_graph_fb(file_path):
    graph = defaultdict(list)
    
    with open(file_path, 'r') as file:
        for line in file:
            node1, node2 = map(int, line.split())
            graph[node1].append(node2)
            graph[node2].append(node1)
    
    return graph

graph = process_file_to_graph_fb(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)


Original graph size: 4039
3 3


In [88]:
# Github ML graph

def process_file_to_graph_csv(file_path):
    graph = defaultdict(list)
    
    with open(file_path, 'r') as file:
        next(file)  # Skip the first row
        for line in file:
            node1, node2 = map(int, line.split(','))
            graph[node1].append(node2)
            graph[node2].append(node1)
    
    return graph

file_path = "./data/git_web_ml/musae_git_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

Original graph size: 37700
211 211


In [101]:
# Deezer Europe graph  

file_path = "./data/deezer_europe/deezer_europe_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

Original graph size: 28281
474 474


In [104]:
# Deezer Europe clean graph  

file_path = "./data/deezer_clean_data/HR_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

file_path = "./data/deezer_clean_data/HU_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

file_path = "./data/deezer_clean_data/RO_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

Original graph size: 54573
327 327
Original graph size: 47538
475 475
Original graph size: 41773
1183 1183


In [106]:
# Facebook pages clean graph  

file_path = "./data/facebook_clean_data/artist_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)

file_path = "./data/facebook_clean_data/athletes_edges.csv"
graph = process_file_to_graph_csv(file_path)
print("Original graph size:", len(graph))
best_set_size, best_effective_set_size, best_effective_set = find_best_5_hop_ind_set(graph)
print(best_set_size, best_effective_set_size)