In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import networkx as nx

from node2vec import Node2Vec

from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from scipy.optimize import  linear_sum_assignment
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from seirsplus.assignment import embed_nodes, get_equal_sized_clusters

In [4]:
def get_household(households, node_id):
    for household in households:
        if node_id in household['indices']:
            return household['indices']
    return None


def get_assignment(groups, node_id):
    for group in groups.values():
        if node_id in group:
            return group


In [5]:
from seirsplus.networks import generate_demographic_contact_network, household_country_data

N = 1000
np.random.seed(0)
demographic_graphs, individual_ageGroups, households = generate_demographic_contact_network(
    N=N, 
    demographic_data=household_country_data('US'),
    distancing_scales=[0.7], 
    isolation_groups=[]
)

G = demographic_graphs['baseline']

Generated overall age distribution:
0-9: 0.1140	(-0.0070 from target)
10-19: 0.1310	(0.0000 from target)
20-29: 0.1410	(0.0040 from target)
30-39: 0.1440	(0.0110 from target)
40-49: 0.1170	(-0.0070 from target)
50-59: 0.1290	(-0.0020 from target)
60-69: 0.1020	(-0.0130 from target)
70-79: 0.0720	(0.0020 from target)
80+: 0.0500	(0.0120 from target)

Generated household size distribution:
1: 0.3259	(0.0422 from target)
2: 0.2988	(-0.0463 from target)
3: 0.1160	(-0.0346 from target)
4: 0.1481	(0.0205 from target)
5: 0.0691	(0.0114 from target)
6: 0.0346	(0.0119 from target)
7: 0.0074	(-0.0051 from target)
Num households: 405
mean household size: 2.448393594

Generating graph for 0-9...
Generating graph for 10-19...
Generating graph for 20-59...


  return adjacency_matrix(G, nodelist, dtype, weight)


Generating graph for 60+...


In [6]:
G.edges.data()

EdgeDataView([(0, 2, {'weight': 1}), (0, 3, {'weight': 1}), (0, 4, {'weight': 1}), (0, 5, {'weight': 1}), (0, 7, {'weight': 1}), (0, 8, {'weight': 1}), (0, 9, {'weight': 1}), (0, 16, {'weight': 1}), (0, 19, {'weight': 1}), (0, 21, {'weight': 1}), (0, 28, {'weight': 1}), (0, 45, {'weight': 1}), (0, 50, {'weight': 1}), (0, 55, {'weight': 1}), (0, 246, {}), (1, 2, {'weight': 1}), (1, 3, {'weight': 1}), (1, 4, {'weight': 1}), (1, 5, {'weight': 1}), (1, 7, {'weight': 1}), (1, 9, {'weight': 1}), (1, 11, {'weight': 1}), (1, 16, {'weight': 1}), (1, 19, {'weight': 1}), (1, 21, {'weight': 1}), (1, 23, {'weight': 1}), (1, 25, {'weight': 1}), (1, 27, {'weight': 1}), (1, 28, {'weight': 1}), (1, 30, {'weight': 1}), (1, 45, {'weight': 1}), (1, 50, {'weight': 1}), (1, 53, {'weight': 1}), (1, 55, {'weight': 1}), (1, 249, {}), (2, 3, {'weight': 1}), (2, 4, {'weight': 1}), (2, 5, {'weight': 1}), (2, 6, {'weight': 1}), (2, 7, {'weight': 1}), (2, 8, {'weight': 1}), (2, 9, {'weight': 1}), (2, 10, {'weight':

In [7]:
embedding, node2vec_model = embed_nodes(G)
clusters = get_equal_sized_clusters(
    X=embedding,
    model=node2vec_model,
    graph=G,
    cluster_size=5,
)  # dict of node_id to cluster id
cluster_ids = list(set(clusters.values()))  # unique list of cluster ids
groups = {
    i: [x for x, v in clusters.items() if v == i] for i in cluster_ids
}  # dict of cluster ids as the keys and the node ids as the values

Computing transition probabilities:   0%|          | 0/1000 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 3/3 [00:00<00:00,  5.52it/s]
Generating walks (CPU: 2): 100%|██████████| 3/3 [00:00<00:00,  5.46it/s]
Generating walks (CPU: 3): 100%|██████████| 2/2 [00:00<00:00,  5.77it/s]
Generating walks (CPU: 4): 100%|██████████| 2/2 [00:00<00:00,  6.62it/s]


In [8]:
for i in range(100):
    print(get_household(households, i), get_assignment(groups, i))

[0, 246] [0, 8, 16, 46, 54]
[1, 249] [1, 3, 5, 9, 23]
[2, 252, 251] [2, 44, 67, 68, 198]
[3, 253] [1, 3, 5, 9, 23]
[5, 4] [4, 21, 73, 74, 326]
[5, 4] [1, 3, 5, 9, 23]
[6, 115] [6, 41, 89, 99, 100]
[116, 8, 7, 263] [7, 25, 56, 80, 90]
[116, 8, 7, 263] [0, 8, 16, 46, 54]
[9, 117, 266, 265, 264] [1, 3, 5, 9, 23]
[11, 10, 120] [10, 11, 28, 35, 36]
[11, 10, 120] [10, 11, 28, 35, 36]
[12, 796, 281, 280, 279, 795] [12, 13, 24, 31, 34]
[13, 291, 290, 289] [12, 13, 24, 31, 34]
[15, 14] [14, 78, 606, 636, 637]
[15, 14] [15, 45, 47, 55, 81]
[17, 16] [0, 8, 16, 46, 54]
[17, 16] [17, 26, 42, 64, 65]
[123, 18, 322, 321, 320] [18, 79, 88, 673, 929]
[19, 325, 324, 323] [19, 105, 106, 883, 986]
[124, 20] [20, 22, 30, 37, 38]
[21, 125, 326] [4, 21, 73, 74, 326]
[129, 22, 128] [20, 22, 30, 37, 38]
[133, 23] [1, 3, 5, 9, 23]
[24, 138] [12, 13, 24, 31, 34]
[25, 140, 139, 362] [7, 25, 56, 80, 90]
[26, 141] [17, 26, 42, 64, 65]
[28, 27, 368, 367, 366] [27, 69, 70, 71, 200]
[28, 27, 368, 367, 366] [10, 11, 28

In [9]:
for e in G.edges():
    if 'weight' not in G[e[0]][e[1]]:
        G[e[0]][e[1]]['weight'] = 10000000000
        

In [10]:
G.edges.data()

EdgeDataView([(0, 2, {'weight': 1}), (0, 3, {'weight': 1}), (0, 4, {'weight': 1}), (0, 5, {'weight': 1}), (0, 7, {'weight': 1}), (0, 8, {'weight': 1}), (0, 9, {'weight': 1}), (0, 16, {'weight': 1}), (0, 19, {'weight': 1}), (0, 21, {'weight': 1}), (0, 28, {'weight': 1}), (0, 45, {'weight': 1}), (0, 50, {'weight': 1}), (0, 55, {'weight': 1}), (0, 246, {'weight': 10000000000}), (1, 2, {'weight': 1}), (1, 3, {'weight': 1}), (1, 4, {'weight': 1}), (1, 5, {'weight': 1}), (1, 7, {'weight': 1}), (1, 9, {'weight': 1}), (1, 11, {'weight': 1}), (1, 16, {'weight': 1}), (1, 19, {'weight': 1}), (1, 21, {'weight': 1}), (1, 23, {'weight': 1}), (1, 25, {'weight': 1}), (1, 27, {'weight': 1}), (1, 28, {'weight': 1}), (1, 30, {'weight': 1}), (1, 45, {'weight': 1}), (1, 50, {'weight': 1}), (1, 53, {'weight': 1}), (1, 55, {'weight': 1}), (1, 249, {'weight': 10000000000}), (2, 3, {'weight': 1}), (2, 4, {'weight': 1}), (2, 5, {'weight': 1}), (2, 6, {'weight': 1}), (2, 7, {'weight': 1}), (2, 8, {'weight': 1}),

In [11]:
embedding, node2vec_model = embed_nodes(G)
clusters = get_equal_sized_clusters(
    X=embedding,
    model=node2vec_model,
    graph=G,
    cluster_size=5,
)  # dict of node_id to cluster id
cluster_ids = list(set(clusters.values()))  # unique list of cluster ids
groups = {
    i: [x for x, v in clusters.items() if v == i] for i in cluster_ids
}  # dict of cluster ids as the keys and the node ids as the values

Computing transition probabilities:   0%|          | 0/1000 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 3/3 [00:00<00:00, 10.07it/s]
Generating walks (CPU: 2): 100%|██████████| 3/3 [00:00<00:00,  9.89it/s]
Generating walks (CPU: 3): 100%|██████████| 2/2 [00:00<00:00,  9.63it/s]
Generating walks (CPU: 4): 100%|██████████| 2/2 [00:00<00:00, 10.43it/s]


In [12]:
for i in range(100):
    print(get_household(households, i), get_assignment(groups, i))

[0, 246] [0, 246, 381, 645, 731]
[1, 249] [1, 66, 579, 580, 590]
[2, 252, 251] [2, 251, 252, 379, 389]
[3, 253] [3, 216, 380, 882, 955]
[5, 4] [4, 21, 125, 326, 989]
[5, 4] [5, 19, 323, 324, 325]
[6, 115] [6, 64, 115, 570, 933]
[116, 8, 7, 263] [7, 8, 116, 263, 678]
[116, 8, 7, 263] [7, 8, 116, 263, 678]
[9, 117, 266, 265, 264] [9, 117, 264, 265, 266]
[11, 10, 120] [10, 11, 120, 651, 722]
[11, 10, 120] [10, 11, 120, 651, 722]
[12, 796, 281, 280, 279, 795] [12, 279, 281, 795, 796]
[13, 291, 290, 289] [13, 289, 290, 291, 667]
[15, 14] [14, 37, 38, 171, 883]
[15, 14] [15, 24, 138, 958, 959]
[17, 16] [16, 17, 349, 719, 974]
[17, 16] [16, 17, 349, 719, 974]
[123, 18, 322, 321, 320] [18, 123, 320, 321, 322]
[19, 325, 324, 323] [5, 19, 323, 324, 325]
[124, 20] [20, 124, 612, 613, 855]
[21, 125, 326] [4, 21, 125, 326, 989]
[129, 22, 128] [22, 128, 129, 610, 723]
[133, 23] [23, 133, 370, 745, 987]
[24, 138] [15, 24, 138, 958, 959]
[25, 140, 139, 362] [25, 139, 140, 362, 431]
[26, 141] [26, 35, 