In [1]:
import pandas as pd
import os
import networkx as nx
import community
import numpy as np

from tqdm import tqdm
import copy
import sys
sys.path.insert(0, '/home/vincent/Documents/School/Research/EoBoR/')
from hungarian import Hungarian

In [2]:
nodestats_folder = "nodestats_folder/"

In [3]:
def transposeDict(original_dict,items=False):
    from collections import defaultdict
    result = defaultdict(list)
    items_l = original_dict.items() if not items else original_dict
    for k,v in items_l:
        result[v].append(k)
    result = dict(result)
    return result

# Labelling dynamic communities

Two methods:
* Hungarian method with cutoff
* Sant's "best match"

##### Pulling networks from data

In [None]:
partitions_over_time = []

for filename in sorted(os.listdir(nodestats_folder)):
    with open(nodestats_folder+filename,'rb') as f:
        df = pickle.load(f)
        partitions_over_time.append( df['partition_f'].tolist() ) ### CHANGE THIS TO TRANSPOSED DICTIONARIES

##### Generating test networks

In [4]:
import community

In [6]:
networks_over_time = []
partitions_over_time = []

networks_over_time.append(nx.random_geometric_graph(500,0.125))
partitions_over_time.append(transposeDict(dict(community.best_partition(networks_over_time[0]))))

for x in tqdm(range(100)):
    g = nx.Graph(networks_over_time[x])
    #randomly delete a hundred nodes
    removed_nodes = []
    for i in range(100):
        removed_node = np.random.choice(list(g.nodes()))
        removed_nodes.append(removed_node)
        g.remove_node(removed_node)
    for i in list(nx.isolates(g)):
        g.remove_node(i)
        removed_nodes.append(i)
    #randomly add a hundred nodes by neighbors
    for node in removed_nodes:
        neighbors = []
        while neighbors==[]:
            select_node = np.random.choice(list(g.nodes()))
            neighbors = list(g.neighbors(select_node))
        neighbors_deg = [j for q,j in list(g.degree(neighbors))]
        neighbors_deg = [j/sum(neighbors_deg) for j in neighbors_deg]
        select_neighbor = np.random.choice(neighbors,p=neighbors_deg)
        g.add_edge(select_neighbor,node)
    networks_over_time.append(g)
    partitions_over_time.append(transposeDict(dict(community.best_partition(g))))

100%|██████████| 100/100 [00:04<00:00, 25.00it/s]


## Hungarian method

#### Generating reassignments

* If there is a forward map and it matches the backward map, then it's good. 
* If there is a forward map but a different backward map, it means a forward handoff (comm1 becomes comm2)
* If there is a backward map but a different forward map, same kind of handoff but different specific handoff. 
* If there is no backward map, the community dies. 
* If there is no forward map, the community is created. 

In [14]:
def generateReassignments(partitions_over_time):
    reassignment_partitions = []

    prev_part = partitions_over_time[0]#Start with the first one in the time series
    reassignment_partitions.append({x:x for x in prev_part.keys()})
    for next_part in tqdm(partitions_over_time[1:]):
        #Computer matrix of Jaccard distances between communities
        jacmatrix = np.zeros((len(prev_part),len(next_part)))
        for i,x in enumerate(prev_part.keys()):
            for j,y in enumerate(next_part.keys()):
                one = set(prev_part[x])
                two = set(next_part[y])
                jacmatrix[i][j] = 1 - len(one&two)/len(one|two)  ### DOES THIS NEED A THRESHOLD???
        #Hungarian from prev to next (forward)
        hungarian = Hungarian(jacmatrix)
        hungarian.calculate()
        forward_map = hungarian.get_results()
        #Hungarian from next to prev (backward)
        hungarian2 = Hungarian(np.transpose(jacmatrix))
        hungarian.calculate()
        backward_map = hungarian.get_results()

        dead_partitions = [pair for pair in forward_map if pair not in backward_map or pair[::-1] not in backward_map]
        steady_partitions = [pair for pair in forward_map if pair in backward_map or pair[::-1] not in backward_map]
        new_partitions = [pair for pair in backward_map if pair not in forward_map or pair[::-1] not in forward_map]
        
        valid_partitions = forward_map
        #Create a reassignment dictionary for next_part
        this_assignment_dict = {y:x for x,y in valid_partitions} #Formatted as "this_key:will_become_this_val"

        reassignment_partitions.append(this_assignment_dict)
        prev_part = next_part
    return reassignment_partitions

#### Confirming reassignments

At this point, `reassignment_partitions` is a list of dictionaries that tell you how to relabel communities to the corresponding label of the previous time step. 

The following generates `final_partitions`, a list of dictionaries that tell you how to relabel communities to an absolute label over time. 

In [8]:
def finalizingPartitions(reassignment_partitions):
    final_partitions = []
    final_partitions.append(reassignment_partitions[0])
    prev_dict = reassignment_partitions[0]
    new_comm_label = max(prev_dict.keys()) + 1

    #Go through the reassignments
    for x in range(len(reassignment_partitions[1:])):
        current_dict = copy.deepcopy(reassignment_partitions[x])
        for k,v in current_dict.items():
            if v in prev_dict.keys():
                current_dict[k] = prev_dict[v]
            elif v not in prev_dict.keys():
                current_dict[k] = new_comm_label
                new_comm_label+=1
        final_partitions.append(current_dict)
        prev_dict = current_dict
    return final_partitions

## Generating and testing example networks

In [15]:
reassignment_partitions = generateReassignments(partitions_over_time)

100%|██████████| 100/100 [00:00<00:00, 128.48it/s]


In [16]:
final_partitions = finalizingPartitions(reassignment_partitions)

In [17]:
final_partitions

[{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6},
 {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6},
 {0: 1, 1: 0, 2: 2, 3: 3, 5: 4, 6: 5, 7: 6},
 {0: 1, 1: 0, 2: 2, 3: 4, 4: 7, 5: 5, 7: 6, 8: 3},
 {0: 8, 1: 0, 2: 4, 3: 2, 4: 5, 5: 1, 6: 6, 7: 7, 8: 9},
 {0: 8, 1: 0, 2: 4, 3: 2, 4: 5, 5: 7, 6: 6, 8: 1, 9: 9},
 {0: 10, 1: 2, 2: 0, 3: 8, 4: 5, 5: 7, 6: 9, 7: 6, 9: 11, 10: 1, 11: 4},
 {0: 10,
  1: 2,
  2: 0,
  4: 5,
  5: 8,
  6: 7,
  7: 9,
  8: 6,
  9: 11,
  10: 12,
  11: 1,
  12: 4,
  13: 13},
 {1: 2,
  2: 0,
  3: 14,
  4: 13,
  5: 6,
  6: 12,
  7: 8,
  8: 9,
  9: 5,
  10: 10,
  11: 1,
  12: 7,
  13: 11,
  14: 4},
 {0: 15,
  2: 0,
  3: 4,
  4: 2,
  6: 8,
  7: 11,
  8: 12,
  10: 9,
  11: 5,
  12: 13,
  13: 1,
  14: 7,
  15: 6,
  16: 10,
  17: 16,
  18: 14},
 {0: 15,
  1: 17,
  2: 0,
  3: 4,
  4: 2,
  5: 8,
  7: 12,
  9: 11,
  10: 13,
  11: 18,
  12: 9,
  13: 1,
  14: 7,
  16: 6,
  17: 10,
  18: 16,
  19: 19,
  20: 5,
  21: 14},
 {0: 15,
  1: 17,
  2: 11,
  3: 4,
  4: 2,
  5: 8,
  6: 20,
  8: 21,
  9

In [18]:
converted_partitions_over_time = []

for x,part in enumerate(partitions_over_time):
    result = {final_partitions[x][k]:part[k] for k in part}
    converted_partitions_over_time.append( result )

KeyError: 7

# Testing

In [None]:
a = {i:i+10 for i in range(20)}

In [None]:
a.values()