In [1]:
import pickle
import networkx as nx
import pprint
from new_utils import (Greedy, simple_random_selection, 
                       analyze_network_by_labels, find_best_set_of_leaders, 
                       get_connected_subgraph, select_nodes_by_label, comm_eff, 
                       randomAlgo, create_unique_label_combinations)
import preprocessing as pp
import numpy as np
import random
from collections import Counter
import matplotlib.pyplot as plt

In [2]:
network = pickle.load(open('./networks/test_net_2005_to_2010.pkl', 'rb'))
G = network.copy()

In [3]:
print("Network Attributes")
print("------------------")
print(f"Num of Nodes: {G.number_of_nodes()}")
print(f"Num of Edges: {G.number_of_edges()}")

Network Attributes
------------------
Num of Nodes: 659
Num of Edges: 43879


In [4]:
label_counts = nx.get_node_attributes(G, 'label')
label_counts = dict(Counter(label_counts.values()))
label_counts

{'AI': 108, 'DM': 60, 'DB': 322, 'T': 169}

In [5]:
# create the project networks
list_1 = [('DM', 'T'), ('DB', 'T'), ('AI', 'T')]
list_2 = [('DM', 'T'), ('DB', 'T'), ('AI', 'T'), ('DM', 'DB'), ('DM', 'AI'), ('DB', 'AI')]
list_3 = [('DM', 'AI'), ('DB', 'T'), ('AI', 'DB')]

star_proj = pp.createProjectNetwork(list_1)
full_proj = pp.createProjectNetwork(list_2)
chain_proj = pp.createProjectNetwork(list_3)

project_net = chain_proj
shape = 'star'

In [6]:
# G = pp.remove_edges_based_on_project_network(network.copy(), project_net)

In [7]:
print("Network Attributes")
print("------------------")
print(f"Nun of Nodes: {G.number_of_nodes()}")
print(f"Nun of Edges: {G.number_of_edges()}")

Network Attributes
------------------
Nun of Nodes: 659
Nun of Edges: 43879


In [8]:
# results, top_nodes = analyze_network_by_labels(G)

# print("---Influence Greedy---")
# best_set, max_eff = find_best_set_of_leaders(G, top_nodes)
# print(f"Best Set of Leaders: {best_set}, Efficiency: {max_eff}")
# print(f"Nodes not in top nodes: {best_set - set(top_nodes)}")
# print("\n")

# # Testing out some of the top nodes combinations
# print("--Influence Only---")
# combinations = create_unique_label_combinations(G, top_nodes)
# total = 0.0
# iters = 0
# print(f"Total combinations: {len(combinations)}")
# for i, combo in enumerate(combinations[:100], 1):  # Print first 5 combinations
#     nodes = [node['id'] for node in combo]
#     # print(f"{nodes}: --> Efficiency : {comm_eff(net_10, nodes)}")
#     total += comm_eff(G, nodes)
#     iters += 1
# print(f"Average for combination {iters} combinations of leaders is: {total/iters}")
# print("\n")

# print("---Random Monte Carlo---")
# print(f"Communication efficiency of Random Algorithm: {randomAlgo(G)}")

#### 10-NODE NETWORK

In [9]:
# Create a subgraph with 20 nodes from each Team
size_per_team = 10
results, selected_nodes = select_nodes_by_label(G, size_per_label=size_per_team)
net = pickle.load(open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'rb'))
# net = G.subgraph(selected_nodes)
# # Save network G in a pickle file
# with open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'wb') as file:
#     pickle.dump(net, file)
print(f"Created a {4 * size_per_team} network")

results, top_nodes = analyze_network_by_labels(net)

print("---Influence Greedy---")
best_set, max_eff = find_best_set_of_leaders(net, top_nodes)
print(f"Best Set of Leaders: {best_set}, Efficiency: {max_eff}")
print(f"Nodes not in top nodes: {best_set - set(top_nodes)}")
print("\n")

print("---Influence only---")
combinations = create_unique_label_combinations(net, top_nodes)
total = 0.0
iters = 0
print(f"Total combinations: {len(combinations)}")
for i, combo in enumerate(combinations[:100], 1):  # Print first 5 combinations
    nodes = [node['id'] for node in combo]
    # print(f"{nodes}: --> Efficiency : {comm_eff(net, nodes)}")
    total += comm_eff(net, nodes)
    iters += 1
print(f"Average for combination {iters} combinations of leaders is: {total/iters}")
print("\n")

print("---Random Monte Carlo---")
print(f"Communication efficiency of Random Algorithm: {randomAlgo(net)}")

Created a 40 network

Total number of top nodes across all labels: 17
---Influence Greedy---
Best Set of Leaders: {'David W. Cheung', 'Bernhard Schölkopf', 'Xifeng Yan', 'J. Ian Munro'}, Efficiency: 4.0838
Nodes not in top nodes: set()


---Influence only---
Total combinations: 50
Average for combination 50 combinations of leaders is: 4.079517999999998


---Random Monte Carlo---
Communication efficiency of Random Algorithm: 3.5996


#### 15-NODE NETWORK

In [10]:
# Create a subgraph with 20 nodes from each Team
size_per_team = 15
results, selected_nodes = select_nodes_by_label(G, size_per_label=size_per_team)
net = pickle.load(open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'rb'))
# net = G.subgraph(selected_nodes)
# with open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'wb') as file:
#     pickle.dump(net, file)
print(f"Created a {4 * size_per_team} network")

results, top_nodes = analyze_network_by_labels(net)

print("---Influence Greedy---")
best_set, max_eff = find_best_set_of_leaders(net, top_nodes)
print(f"Best Set of Leaders: {best_set}, Efficiency: {max_eff}")
print(f"Nodes not in top nodes: {best_set - set(top_nodes)}")
print("\n")

print("---Influence only---")
combinations = create_unique_label_combinations(net, top_nodes)
total = 0.0
iters = 0
print(f"Total combinations: {len(combinations)}")
for i, combo in enumerate(combinations[:100], 1):  # Print first 5 combinations
    nodes = [node['id'] for node in combo]
    # print(f"{nodes}: --> Efficiency : {comm_eff(net, nodes)}")
    total += comm_eff(net, nodes)
    iters += 1
print(f"Average for combination {iters} combinations of leaders is: {total/iters}")
print("\n")

print("---Random Monte Carlo---")
print(f"Communication efficiency of Random Algorithm: {randomAlgo(net)}")

Created a 60 network

Total number of top nodes across all labels: 25
---Influence Greedy---
Best Set of Leaders: {'Bernhard Schölkopf', 'Mikkel Thorup', 'Xifeng Yan', 'Aristides Gionis'}, Efficiency: 4.092
Nodes not in top nodes: set()


---Influence only---
Total combinations: 450
Average for combination 100 combinations of leaders is: 4.080880999999999


---Random Monte Carlo---
Communication efficiency of Random Algorithm: 3.4809


##### 20-NODE NETWORK

In [11]:
# Create a subgraph with 20 nodes from each Team
size_per_team = 20
results, selected_nodes = select_nodes_by_label(G, size_per_label=size_per_team)
net = pickle.load(open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'rb'))
# net = G.subgraph(selected_nodes)
# with open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'wb') as file:
#     pickle.dump(net, file)
print(f"Created a {4 * size_per_team} network")

results, top_nodes = analyze_network_by_labels(net)

print("---Influence Greedy---")
best_set, max_eff = find_best_set_of_leaders(net, top_nodes)
print(f"Best Set of Leaders: {best_set}, Efficiency: {max_eff}")
print(f"Nodes not in top nodes: {best_set - set(top_nodes)}")
print("\n")

print("---Influence only---")
combinations = create_unique_label_combinations(net, top_nodes)
total = 0.0
iters = 0
print(f"Total combinations: {len(combinations)}")
for i, combo in enumerate(combinations[:100], 1):  # Print first 5 combinations
    nodes = [node['id'] for node in combo]
    # print(f"{nodes}: --> Efficiency : {comm_eff(net, nodes)}")
    total += comm_eff(net, nodes)
    iters += 1
print(f"Average for combination {iters} combinations of leaders is: {total/iters}")
print("\n")

print("---Random Monte Carlo---")
print(f"Communication efficiency of Random Algorithm: {randomAlgo(net)}")

Created a 80 network

Total number of top nodes across all labels: 20
---Influence Greedy---
Best Set of Leaders: {'Mikkel Thorup', 'Aristides Gionis', 'Anthony K. H. Tung', 'Michael I. Jordan'}, Efficiency: 4.1121
Nodes not in top nodes: set()


---Influence only---
Total combinations: 441
Average for combination 100 combinations of leaders is: 4.089793999999995


---Random Monte Carlo---
Communication efficiency of Random Algorithm: 3.444


##### 25-NODE NETWORK

In [12]:
# Create a subgraph with 25 nodes from each Team
size_per_team = 25
results, selected_nodes = select_nodes_by_label(G, size_per_label=size_per_team)
net = pickle.load(open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'rb'))
# net = G.subgraph(selected_nodes)
# with open(f'./networks/sub_nets_05_10_{shape}/{4 * size_per_team}_team_network.pkl', 'wb') as file:
#     pickle.dump(net, file)
print(f"Created a {4 * size_per_team} network")

results, top_nodes = analyze_network_by_labels(net)

print("---Influence Greedy---")
best_set, max_eff = find_best_set_of_leaders(net, top_nodes)
print(f"Best Set of Leaders: {best_set}, Efficiency: {max_eff}")
print(f"Nodes not in top nodes: {best_set - set(top_nodes)}")
print("\n")

print("---Influence only---")
combinations = create_unique_label_combinations(net, top_nodes)
total = 0.0
iters = 0
print(f"Total combinations: {len(combinations)}")
for i, combo in enumerate(combinations[:100], 1):  # Print first 5 combinations
    nodes = [node['id'] for node in combo]
    # print(f"{nodes}: --> Efficiency : {comm_eff(net, nodes)}")
    total += comm_eff(net, nodes)
    iters += 1
print(f"Average for combination {iters} combinations of leaders is: {total/iters}")
print("\n")

print("---Random Monte Carlo---")
print(f"Communication efficiency of Random Algorithm: {randomAlgo(net)}")

Created a 100 network

Total number of top nodes across all labels: 16
---Influence Greedy---
Best Set of Leaders: {'Karsten M. Borgwardt', 'Mikkel Thorup', 'Anthony K. H. Tung', 'Bo Long'}, Efficiency: 3.9895
Nodes not in top nodes: set()


---Influence only---
Total combinations: 96
Average for combination 96 combinations of leaders is: 3.9811916666666654


---Random Monte Carlo---
Communication efficiency of Random Algorithm: 3.2185
