# **Preamble**

## Imports

In [3]:
%matplotlib inline
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import subprocess
import re
import itertools
import mmh3
import random
import json
import pickle
from datetime import datetime as dt
from datetime import time
from collections import defaultdict, Counter
from datetime import datetime as dt
from scipy.sparse import triu
from dateutil import tz
import datetime
from sklearn.neighbors import KernelDensity

## Utility functions

### General

In [10]:
def naive(arr, stat):
    try:
        return stat(arr)
    except ValueError:
        return -1
    
def load_binned_network(kind, filename):
    with open('data/processed_data/binned_networks/'+kind+'/'+filename+'.pkl', 'r') as infile:
        return pickle.load(infile)
    
def default_to_regular(d):
    """Recursively convert nested defaultdicts to nested dicts.

    Source: http://stackoverflow.com/questions/26496831/how-to-convert-defaultdict-of-defaultdicts-of-defaultdicts-to-dict-of-dicts-o
    """
    if isinstance(d, defaultdict):
        d = {k: default_to_regular(v) for k, v in d.iteritems()}
    return d

### Networks

In [5]:
def build_adjacency_tensor(layers, index="zero"):
    nodes = set([
        n
        for l in layers
        for n in list(l['user1']) + list(l['user2'])
    ])
    
    ind = dict((n, i) for i, n in enumerate(nodes))
    
    A = defaultdict(int)
    for l, layer in enumerate(layers):
        for _, row in layer.iterrows():
            # Must add both ways if undirected so A becomes symmetrical. If only added one-way
            # triu will only be connections from 'user' and and tril from 'bt_mac' or vice versa.
            if index == "zero":
                A[(ind[row['user1']], ind[row['user2']], l)] += 1
                A[(ind[row['user2']], ind[row['user1']], l)] += 1
            else:
                A[(row['user1'], row['user2'], l)] += 1
                A[(row['user2'], row['user1'], l)] += 1
    return A

def write_pajek(A, node_labels=None, index_from=0):
    """Return multiplex representation of multiplex network adjacency matrix A
    
    Providing an adjacency tensor where A[:, :, k] is adjacency matrix of temporal
    layer k, return a pajek format representation of the temporal network which weights interlayer
    edges by state node neighborhood similarity. 
    
    Parameters
    ----------
    A : numpy.3darray
        3d tensor where each A[:, :, k] is a layer adjacency matrix
    node_labels : list
        List of node labels if (optional)
    index_from : int
        From which number to index nodes and layers in pajek format from (default=0)

    Returns
    -------
    out : string
        A network string in pajek format
    """
    
    def _write_outfile(A):
        """Write nodes and intra/inter-edges from A and J to string."""
        def __remove_symmetry_A(A):
            A_triu = defaultdict(int)
            for (i, j, k), w in A.items():
                if j > i:
                    A_triu[(i, j, k)] = w
            return A_triu
        def __write_nodes(outfile):
            outfile += "*Vertices %d" % Nn
            for nid, label in enumerate(nodes):
                outfile += '\n%d "%s" 1.0' % (nid + index_from, str(label))
            return outfile
        def __write_intra_edges(outfile):
            outfile += "\n*Intra\n# layer node node [weight]"
            for (i, j, k), w in __remove_symmetry_A(A).items():
                outfile += '\n%d %d %d %f' % (
                    k + index_from,  # layer
                    nodemap[i] + index_from,  # node
                    nodemap[j] + index_from,  # node
                    w                # weight
                )
            return outfile
        
        outfile = ""
        outfile = __write_nodes(outfile)
        outfile = __write_intra_edges(outfile)
        
        return outfile
    
    nodes = sorted(set([n for i, j, _ in A.keys() for n in [i, j]]))
    Nn = len(nodes)
    Nl = len(set([k for i, j, k in A.keys()]))
    
    nodemap = dict(zip(nodes, range(Nn)))

    return _write_outfile(A)


def Infomap(pajek_string, *args, **kwargs):
    """Function that pipes commands to subprocess and runs native Infomap implementation.
    
    Requires two folders (1) 'input' and (2) 'output', in sister-directory of 'infomap' folder
    that contains 'Infomap' executable. To setup 'infomap' folder, close Infomap from 
    https://github.com/mapequation/infomap and run 'make' inside resulting folder.
    
    Parameters
    ----------
    pajek_string : str
        Pajek representation of the network (str)
    *args : dict
        Infomap execution options. (http://www.mapequation.org/code.html#Options)
    Returns
    -------
    communities : list of lists
    layer_communities : data structure in required format for d3 viz (json)
    """
    
    def _get_id_to_label(filename):
        def __int_if_int(val):
            try: return int(val)
            except ValueError: return val
        with open('community_detection/input/' + filename + ".net", 'r') as fp:
            parsed_network = fp.read()
        return dict(
            (int(n.split()[0]), __int_if_int(n.split('"')[1]))
            for n in re.split(r"\*.+", parsed_network)[1].split("\n")[1:-1]
        )
    
    def _parse_communities_multiplex(id_to_label, filename):
        with open('community_detection/output/'+filename+"_expanded.clu", 'r') as infile:
            clusters = infile.read()

        # Get layers, nodes and clusters from _extended.clu file
        la_no_clu_flow = re.findall(r'\d+ \d+ \d+ \d\.\d+', clusters) # ["30 1 2 0.00800543",...]
        la_no_clu_flow = [tuple(i.split()) for i in la_no_clu_flow]

        node_flow_json = defaultdict(float)      # {layer_node: flow, ...}
        community_flow_json = defaultdict(float) # {community: flow, ...}
        communities_json = defaultdict(set)      # {layer: {(node, cluster), ...}, ...}
        for layer, node, cluster, flow in la_no_clu_flow:
            node_flow_json[(int(layer), id_to_label[int(node)])] += float(flow)
            community_flow_json[cluster] += float(flow)
            communities_json[int(layer)].add((id_to_label[int(node)], int(cluster)))

        return communities_json, node_flow_json, community_flow_json
    
    def _parse_communities_planar(id_to_label, filename):
        with open('community_detection/output/'+filename+".clu", 'r') as infile:
            clusters = infile.read()
        
        # Get nodes and clusters from .clu file
        no_clu = [tuple(i.split()[:-1]) for i in re.findall(r"\d+ \d+ \d\.\d+", clusters)]  # [(node, cluster), ...]
        return {0: set([(id_to_label[int(no)], int(clu)) for no, clu in no_clu])}
    
    def _clean_up(filename):
        subprocess.call(['rm', 'community_detection/input/'+filename+'.net'])
        subprocess.call(['rm', 'community_detection/output/'+filename+'_expanded.clu'])
        subprocess.call(['rm', 'community_detection/output/'+filename+'.clu'])
    
    # Check for process id in args (for multiprocessing)
    if args[-1][:3] == "pid":
        pid = args[-1][3:]
        args = args[:-1]
    else:
        pid = ""
        
    # Set default kwarg params
    return_flow = kwargs.get("return_flow", False)
        
    # Get network in multiplex string format and define filename
    filename = 'tmpnet' + pid

    # Store locally
    with open("community_detection/input/"+filename+".net", 'w') as outfile:
        outfile.write(pajek_string)
    
    # Run Infomap for multiplex network
    subprocess.call(
        ['./community_detection/infomap/Infomap', 'community_detection/input/'+filename+".net"] + \
        list(args)
    )
    
    # Parse communities from Infomap output
    id_to_label = _get_id_to_label(filename)
    
    if 'multiplex' in list(args):
        parsed_communities, node_flow, community_flow = _parse_communities_multiplex(id_to_label, filename)
    if 'pajek' in list(args):
        parsed_communities = _parse_communities_planar(id_to_label, filename)
        
    _clean_up(filename)

    orig_clu = [item for sublist in parsed_communities.values() for item in sublist]
    communities = dict()
    for key, group in itertools.groupby(orig_clu, lambda x: x[1]):
        for thing in group:
            try:
                communities[key].append(thing[0])
            except KeyError:
                communities[thing[1]] = [thing[0]]
    communities = dict((k,set(v)) for k,v in communities.items())

    layer_communities = {}
    for layer, group in parsed_communities.items():
        communities = {}
        for no, clu in group:
            try:
                communities[clu-1].append(no)
            except KeyError:
                communities[clu-1] = [no]
        layer_communities[layer] = communities

    if return_flow:
        return communities, layer_communities, node_flow, community_flow
    else:
        return communities, layer_communities
    
def community_members(layer_commu):
    commu_members = defaultdict(set)
    for l, layer_partition in layer_commu.items():
        for c, nodes in layer_partition.items():
            commu_members[c].update(nodes)
    return commu_members

### Not used

# **Load and preprocess**

## *Sensible DTU*

In [None]:
# Load
network_sensibleDTU = load_binned_network('1month_data_new','10mins_short_new'); fof = 2

# Make slices for a span of days (e.g. monday to friday)
spd = 288 / fof  # slices per day
dow = 0

network1 = [
    l
    for d in range(0, 5)
    for l in network_sensibleDTU[spd*(dow+5+d):spd*(dow+6+d)]
]

# Null out layers in non-work hours
network1 = [
    df if 8 <= (l%144)/6 <= 17 else 
    pd.DataFrame(columns = ['timestamp', 'user1', 'user2'])
    for l, df in enumerate(network1)
]


## *Workplace*

In [17]:
# Load
network_workplace = pd.read_csv("data/workplace/tij_InVS.txt", delimiter=" ", names=["timestamp", 'user1', 'user2'])

# Minimum timestamp is a thursday
print dt.fromtimestamp(network_workplace['timestamp'].min()).weekday()

# Shift data by 4 days to make first day a monday
network_workplace['timestamp'] = np.array([dt.fromtimestamp(ts) for ts in (network_workplace['timestamp'] + 86400 * 4)])

# Layer width in minutes
layer_size = 10; fof = 2

# Layer time bins from lower of first to (and including) upper of last
lower_bin = int(network_workplace['timestamp'].min().date().strftime("%s"))
upper_bin = lower_bin + 86400*5# int(network_workplace['timestamp'].max().date().strftime("%s")) + 86400
bins = [
    dt.fromtimestamp(ts)
    for ts in np.arange(lower_bin, upper_bin+layer_size*60, layer_size*60)
]

# List of pandas dataframes, each a temporal network layer
network2 = [
    network_workplace[(network_workplace['timestamp'] > low) & (network_workplace['timestamp'] < high)]
    for low, high in zip(bins[:-1], bins[1:])
]

3


# Processing

## Sensible DTU

In [None]:
layer_indices1 = [l for l, n in enumerate(network1) if n.shape[0] > 0]
A1 = build_adjacency_tensor([n for n in network1 if n.shape[0] > 0], index=None)
network_pajek1 = write_pajek(A1)

In [None]:
_, layer_commu_pred1 = Infomap(
    network_pajek1,
    'community_detection/output/',
    '-i', 'multiplex',
    '--multiplex-relax-rate', '0.25',
    '--overlapping',
    '--expanded',
    '--clu',
    '--two-level',
    '-z',
    'pid%d' % random.randint(0, 1000000)
)
layer_commu_pred1 = dict((layer_indices1[k], v) for k, v in layer_commu_pred1.items())
communities_pred1 = default_to_regular(community_members(layer_commu_pred1))

## Workplace

In [13]:
layer_indices2 = [l for l, n in enumerate(network2) if n.shape[0] > 0]
A2 = build_adjacency_tensor([n for n in network2 if n.shape[0] > 0], index=None)
network_pajek2 = write_pajek(A2)

In [14]:
_, layer_commu_pred2 = Infomap(
    network_pajek2,
    'community_detection/output/',
    '-i', 'multiplex',
    '--multiplex-js-relax-rate', '0.25',
    '--overlapping',
    '--expanded',
    '--clu',
    '--two-level',
    '-z',
    'pid%d' % random.randint(0, 1000000)
)
layer_commu_pred2 = dict((layer_indices2[k], v) for k, v in layer_commu_pred2.items())
communities_pred2 = default_to_regular(community_members(layer_commu_pred2))

# Save for visualization

In [18]:
def fill_polygon(poly, m=1):
    """Return polygon as grid of points inside polygon.

    Input : poly (list of lists)
    Output : output (list of lists)
    """
    xs, ys = zip(*poly)
    
    minx, maxx = min(xs), max(xs)
    miny, maxy = min(ys), max(ys)
    
    X = int((maxx - minx + 1) * m)
    Y = int((maxy - miny + 1) * m)

    grid = np.ones((X, Y), dtype=np.int8)

    return set([
        (x/float(m) + minx - 2, y/float(m) + miny)
        for (x, y) in zip(*np.nonzero(grid))
    ]) | set([
        (x/float(m) + minx + 2, y/float(m) + miny)
        for (x, y) in zip(*np.nonzero(grid))
    ])

def all_points_in_blocks(com_blocks, m=1):
    points = set(
        [
            tuple(point)
            for block in com_blocks 
            for point in fill_polygon(block['points'], m=m)
        ]
    ) 
    perifery = set(
        [
            tuple(point)
            for block in com_blocks 
            for point in block['points']
        ]
    )
    
    return points | perifery

def ordered_set(seq):
    _seq = seq[:2]
    for x in seq[2:]:
        if (_seq[-2][0] == _seq[-1][0]) & (x[0] == _seq[-1][0]):
            _seq[-1] = x
        else:
            _seq.append(x)
    return _seq

def translate_blocks(com_blocks, dx=0):
    return [
        {
            'c': block['c'],
            'points': [
                [point[0] + dx, point[1]]
                for point in block['points']
            ]
        }
        for block in com_blocks
    ]

def bounds_to_width(bounds):
    b_left = bounds[0]
    b_right = bounds[2]
    return b_right - b_left

def com_blocks_overlap(com_blocks_points):
    """Return True of two com blocks overlaps."""
    if len(com_blocks_points & all_occupied_points) != 0:
        return True
    return False
        
def translate_to_fit(com_blocks, com_str, ds, dx_multiplier=1):
    """Take a block of points and transform their x-values to they fit in with existing blocks."""
    _com_blocks = com_blocks[:]
    
    i = 1
    while True:
        _com_blocks_points = all_points_in_blocks(_com_blocks, m=2)
        overlaps = com_blocks_overlap(_com_blocks_points)
        if overlaps:
            dx = ds['coms'][com_str]['max_size'] / 2 * random.choice([-1, 1]) * i
            print "-- Overlaps --",
            print "Moving block %d x-position --" % dx
            _com_blocks = translate_blocks(com_blocks, dx)
        else:
            x_pos = np.mean([_com_blocks[0]['points'][0][0], _com_blocks[0]['points'][-1][0]])
            globals()['all_occupied_points'].update(_com_blocks_points)
            x_vals = set([x for x, y in _com_blocks_points])
            for x in np.arange(min(x_vals), max(x_vals)+1):
                globals()['kernels'].append(x)
            print "Placing block at mean x-position %d" % x_pos
            print
            return _com_blocks
        
        i += 1
        
def compute_similarity_matrix(communities):
    def _get_similarity(i, j, communities):
        """communities : Communities in each layer"""
        sim_counter = len(communities[i] & communities[j])
        tot_counter = len(communities[i] | communities[j])
        return sim_counter / float(tot_counter), sim_counter
    
    dim = max(communities.keys())
    X = {}
    for i in communities.keys():
        for j in communities.keys():
            sim, count = _get_similarity(i, j, communities)
            try:
                X['c' + str(i)].update({'c' + str(j): {'sim': sim, 'count': count}})
            except KeyError:
                X['c' + str(i)] = {'c' + str(j): {'sim': sim, 'count': count}}
    return X

def is_valid_location(new_block, existing_blocks, pad=5):
    """Check whether random horizontal location for block is unoccupied"""
    nb_x = new_block['x']
    nb_w = new_block['w']
    nb_range = set(range(int(nb_x-pad),int(nb_x+nb_w+pad)))
    for block in existing_blocks:
        eb_x = block['x']
        eb_w = block['w']
        eb_range = set(range(int(eb_x-pad),int(eb_x+eb_w+pad)))
        if len(nb_range & eb_range) != 0:
            return False
    return True

def participation_similarity(layer_commu):
    """Returns pairwise community participation profile cosine similarities."""
    # Count node participation for each community
    all_nodes = set()
    commu_participation_profile = defaultdict(Counter)
    for l, layer_partition in layer_commu.items():
        for c, nodes in layer_partition.items():
            commu_participation_profile[c].update(nodes)
            all_nodes.update(nodes)

    # Compute pairwise cosine similarity of participation profiles
    participation_vectors = np.array(
        [
            np.array([commu_participation_profile[c][n] for n in all_nodes])# * 1.0 / sum(commu_participation_profile[c].values())
            for c in sorted(commu_participation_profile.keys())
        ]
    )
    similarity_matrix = cosine_similarity(
        participation_vectors
    )
    
    # Extract upper triangle of similarity matrix
    return [
        similarity_matrix[i, j]
        for i in range(similarity_matrix.shape[0])
        for j in range(similarity_matrix.shape[1])
        if j > i
    ]

## Choose dataset, parameters, compute similarities

In [19]:
# INPUT
network = network2
layer_communities = layer_commu_pred2
communities = communities_pred2
layer_indices = layer_indices2
network_pajek = network_pajek2

# Canvas parameters
actual_height = len(network)
min_group_size = 0

# Set working width and height
target_width = 1000              # <-------- Setting this too small can make it hard to find a solution.
height = actual_height           # ... Too high gives poor visual results. Play around with it.


#------#
# Time #
#------#

layers_concat = pd.concat(network)

# Start, termination, timestep variables
#t0 = layers_concat['timestamp'].min() # 2014-02-03 00:05:00
#tt = layers_concat['timestamp'].max() # 2014-02-04 00:00:00
t0 = dt.combine(layers_concat['timestamp'].min().date(), dt.min.time())
tt = dt.combine(layers_concat['timestamp'].max(), dt.max.time())

d_t = str(5*fof) # 5 (minutes)

thickness = 300 * fof  # 300 (seconds)

# Lines marking important points in time
grid_times = [time(h) for h in [8,12,13,17]]
grid_ticks = dict(
    (i+1, str(t0 + datetime.timedelta(seconds=thickness*(i+1))))
    for i, l in enumerate(network) 
    if (t0 + datetime.timedelta(seconds=thickness*(i+1))).time() in grid_times
)

# Time tick labels
label_times = [time(h) for h in range(24)]
label_ticks = dict(
    (i+1, str(t0 + datetime.timedelta(seconds=thickness*(i+1))))
    for i, _ in enumerate(network) 
    if (t0 + datetime.timedelta(seconds=thickness*(i+1))).time() in label_times
)

#----------------------#
# Build data structure #
#----------------------#

# Initiate data structure
ds = {}

# Meta
ds['meta'] = {'w': None, 'h': height}

# Time
ds['time'] = {
    't0': str(t0),
    'tt': str(tt),
    'dt': d_t,
    'ticks': {
        'label_ticks': label_ticks,
        'grid_ticks': grid_ticks
    }
}

# Community similarities
ds['sims'] = compute_similarity_matrix(communities)

# Initiate communities and layer networks
ds['coms'] = {}
ds['layer_networks'] = {}

## Find positions for temporal community polygons

In [20]:
# Get communities
community_ids = set()
for _, coms in layer_communities.items():
    community_ids.update(coms.keys())
    
# Initiate all occupied points
all_occupied_points = set()
kernels = []

# Loop over communities build blocks
for com in (community_ids):
    
    com_str =  "c" + str(com)
    
    ds['coms'][com_str] = {}
    ds['coms'][com_str]['blocks'] = []
    ds['coms'][com_str]['duration'] = 0
    ds['coms'][com_str]['abs_size'] = len(communities[com])
    ds['coms'][com_str]['min_size'] = np.inf
    ds['coms'][com_str]['max_size'] = 1
    ds['coms'][com_str]['avg_size'] = []
    
    if len(kernels) > 0:
        X = np.array(kernels).reshape((-1, 1))
        kde = KernelDensity(kernel='gaussian', bandwidth=3).fit(X)
        X_grid = np.arange(-target_width/2, target_width/2)
        X_scores = kde.score_samples(X_grid.reshape((-1, 1))).reshape((1, -1))[0]
        X_scores = -np.array(X_scores)
        X_scores = X_scores / np.sum(X_scores)
        dx0_index = np.random.choice(range(len(X_grid)), 1, p=X_scores)[0]
        dx0 = X_grid[dx0_index]
    else:
        dx0 = 0
        
    print "\nCommunity:", com
    #print "Layer:",
    # Compute all points for 'com'
    com_blocks = []
    prev_l = -2
    for l, coms in layer_communities.items():
        
        # Skip layers where com is not present
        if com not in coms: continue
        #print l,
        # Start new block if com was not in previous layer
        if l != prev_l + 1:
            if prev_l != -2: 
                ds['coms'][com_str]['blocks'].append({
                    'c': com_str,                     # Append block to com_blocks after removing points.
                    'points': ordered_set(block)  # If there is a graphics bug, try removing the above line
                })
            block = []
            
        # Width of community in layer l
        com_width = len(coms[com])
        
        if com_width < ds['coms'][com_str]['min_size']: ds['coms'][com_str]['min_size'] = com_width
        if com_width > ds['coms'][com_str]['max_size']: ds['coms'][com_str]['max_size'] = com_width
        ds['coms'][com_str]['avg_size'].append(com_width)
        ds['coms'][com_str]['duration'] += 60 * thickness
        
        points = [
            [-com_width / 2.0 + dx0, l - 1],
            [com_width / 2.0 + dx0, l - 1],
            [-com_width / 2.0 + dx0, l],
            [com_width / 2.0 + dx0, l]
        ]
        
        # Insert points in middle of block to create clockwise polygon points
        for p in points:
            block.insert(len(block) / 2, p)
        
        # Store number of previous layer
        prev_l = l
        
    else:
        ds['coms'][com_str]['blocks'].append({
            'c': com_str,                     # Append block to com_blocks after removing points.
            'points': ordered_set(block)  # If there is a graphics bug, try removing the above line
        })
    
    #print "\nAdding block where it fits",
    # Add the x-position corrected block to a temporary datastructure
    ds['coms'][com_str]['blocks'] = translate_to_fit(ds['coms'][com_str]['blocks'], com_str, ds, dx_multiplier=1)
    ds['coms'][com_str]['avg_size'] = float("%.02f" % np.mean(ds['coms'][com_str]['avg_size']))


Community: 0
Placing block at mean x-position 0


Community: 1
Placing block at mean x-position -353


Community: 2
Placing block at mean x-position 208


Community: 3
Placing block at mean x-position 474


Community: 4
Placing block at mean x-position -162


Community: 5
Placing block at mean x-position 74


Community: 6
Placing block at mean x-position -263


Community: 7
Placing block at mean x-position 331


Community: 8
-- Overlaps -- Moving block 3 x-position --
-- Overlaps -- Moving block 6 x-position --
-- Overlaps -- Moving block 9 x-position --
-- Overlaps -- Moving block 12 x-position --
-- Overlaps -- Moving block -15 x-position --
Placing block at mean x-position -24


Community: 9
Placing block at mean x-position -437


Community: 10
Placing block at mean x-position -499


Community: 11
Placing block at mean x-position 18


Community: 12
Placing block at mean x-position 274


Community: 13
Placing block at mean x-position -92


Community: 14
Placing block at mean x-posit

-- Overlaps -- Moving block 6 x-position --
-- Overlaps -- Moving block -7 x-position --
Placing block at mean x-position -147


Community: 90
Placing block at mean x-position -30


Community: 91
Placing block at mean x-position 474


Community: 92
Placing block at mean x-position -277


Community: 93
-- Overlaps -- Moving block 1 x-position --
-- Overlaps -- Moving block 2 x-position --
Placing block at mean x-position -483


Community: 94
Placing block at mean x-position -14


Community: 95
-- Overlaps -- Moving block 1 x-position --
-- Overlaps -- Moving block -2 x-position --
-- Overlaps -- Moving block -3 x-position --
-- Overlaps -- Moving block -4 x-position --
-- Overlaps -- Moving block -5 x-position --
-- Overlaps -- Moving block 6 x-position --
-- Overlaps -- Moving block 7 x-position --
Placing block at mean x-position -247


Community: 96
Placing block at mean x-position 336


Community: 97
-- Overlaps -- Moving block 1 x-position --
-- Overlaps -- Moving block -2 x-positi

Community: 194
-- Overlaps -- Moving block -1 x-position --
-- Overlaps -- Moving block -2 x-position --
-- Overlaps -- Moving block 3 x-position --
-- Overlaps -- Moving block -4 x-position --
-- Overlaps -- Moving block 5 x-position --
-- Overlaps -- Moving block -6 x-position --
-- Overlaps -- Moving block 7 x-position --
-- Overlaps -- Moving block -8 x-position --
-- Overlaps -- Moving block -9 x-position --
-- Overlaps -- Moving block -10 x-position --
-- Overlaps -- Moving block -11 x-position --
-- Overlaps -- Moving block 12 x-position --
-- Overlaps -- Moving block -13 x-position --
-- Overlaps -- Moving block 14 x-position --
Placing block at mean x-position 22


Community: 195
Placing block at mean x-position -417


Community: 196
Placing block at mean x-position 66


Community: 197
Placing block at mean x-position 385


Community: 198
-- Overlaps -- Moving block -1 x-position --
-- Overlaps -- Moving block 2 x-position --
-- Overlaps -- Moving block 3 x-position --
-- Over

In [21]:
# Add colors
# Deprecated: is still needed in script. Misleading that it is not namechanged or something.
com_cols = {}
for com in ds['coms'].keys():
    r = np.average([mmh3.hash(str(n))%256 for n in communities[int(com[1:])]])
    g = np.average([mmh3.hash(str(n))%255 for n in communities[int(com[1:])]])
    b = np.average([mmh3.hash(str(n))%254 for n in communities[int(com[1:])]])
    com_cols[com] = 'rgb(%d,%d,%d)' % (r,g,b)

In [22]:
# Remove unused space at the ends of the canvas
x_positions = []
for com in ds['coms'].keys():
    for block in ds['coms'][com]['blocks']:
        for point in block['points']:
            x_positions.append(point[0])

ds['meta']['w'] = max(x_positions) - min(x_positions)
for com_str in ds['coms'].keys():
    ds['coms'][com_str]['blocks'] = translate_blocks(ds['coms'][com_str]['blocks'], -min(xx))

## Create datastructure for layer networks

In [23]:
def layer_networks(network_pajek, layer_communities, layer_indices=None):
    
    if layer_indices is None:
        layer_indices = dict((l, l) for l in layer_communities.keys())
    
    rawstring_nodes = network_pajek.split("*")[1:2][0]
    rawstring_edges = network_pajek.split("*")[2:3][0]
    rawstring = ""  # Clear
    
    # Get nodes
    nodes_map = dict(
        (int(n.split()[0]), int(n.split('"')[1]))
        for n in re.findall(r'\d+ ".+?".*?\n', rawstring_nodes)
    )
    nodes_map_reverse = dict((v,k) for k,v in nodes_map.items())
    rawstring_nodes = ""  # Clear
    
    ln = {'data': {}}
    
    e_li = [e[0:1] + e[1:2] + e[0:1] + e[2:] for e in [e_str.split() for e_str in rawstring_edges.split('\n')[2:]]]
    rawstring_edges = ""  # Clear
    
    # Add edges, one at a time
    for e in e_li:
        if e == []:
            continue
        layer, source, target, value = layer_indices[int(e[0])], int(e[1]), int(e[3]), int(float(e[4]))
        try:
            node = source; source_target_err = "source"
            group_source = [g for g, n in layer_communities[layer].items() if nodes_map[source] in n][0]
            node = target; source_target_err = "target"
            group_target = [g for g, n in layer_communities[layer].items() if nodes_map[target] in n][0]
        except IndexError:
            # This occurs if the source/target has no group, WHICH IT SHOULD. Why this happend (and it
            # very rarely does) I don't know, maybe it's something with Infomap.
            print source_target_err, "node", node, "has no group in layer", layer, "... skipping link!"
            continue
        except KeyError:
            # This occurs if there's a node parsed by the Infomap function which was not parsed to the
            # nodes map. Typically this is because I set Infomap to parse from the output pajek file
            # and the nodes map to be parsed from the input.
            print "node", source, "or", target, "could not be parsed in layer", layer, "... skipping link!"
            
        if "c"+str(group_source) not in com_cols or "c"+str(group_target) not in com_cols:
            continue
    
        edge = {'source': source, 'target': target, 'value': value/10.0}
        try:
            ln['data'][layer]['links'].append(edge)
        except KeyError:
            ln['data'][layer] = {'links': [edge]}
        
        for n1, n2 in [(source, target),(target, source)]:
            try:
                ln['data'][layer]['links_dict'][n1].append(n2)
            except KeyError:
                try:
                    ln['data'][layer]['links_dict'][n1] = [n2]
                except KeyError:
                    ln['data'][layer]['links_dict'] = {n1: [n2]}
                
    e_li = []  # Clear
    
    # Add nodes
    for layer, edges_and_nodes in ln['data'].items():
        edges = edges_and_nodes['links']
        nodes_names = set()
        for e in edges:
            nodes_names.add(e['source'])
            nodes_names.add(e['target'])
        nodes = []
        for nn in nodes_names:
            group = [g for g,n in layer_communities[layer].items()
                         if nodes_map[nn] in n][0]
            try:
                col = com_cols['c'+str(group)]
            except KeyError:
                #col = 'rgb(%d,%d,%d)' % (200,200,200)
                continue
            node = {'name': nn, 'id': nodes_map[nn], 'group': group}
            try:
                ln['data'][layer]['nodes'][nn] = node
            except KeyError:
                ln['data'][layer].update({'nodes': {nn: node}})
                
            try:
                ln['data'][layer]['groups'][group].append(node['name'])
            except KeyError:
                try:
                    ln['data'][layer]['groups'][group] = [node['name']]
                except KeyError:
                    ln['data'][layer]['groups'] = {group: [node['name']]}
    
    return ln

ds['layer_networks'] = layer_networks(network_pajek, layer_communities, layer_indices=layer_indices)

## Split resulting data structure into seperate datastructures for each day

In [24]:
def parse_datetime(t):
    return dt.strptime(t, "%Y-%m-%d %H:%M:%S")

def translate_blocks2(v, dl, day_layers, deltatime):
    _v = v.copy()
    p1_all = set()
    _v['blocks'] = []
    for b in v['blocks']:
        block = {'c': b['c'], 'points': []}
        for p in b['points']:
            if p[1] in day_layers:
                p1_new = p[1] + dl
                p1_all.add(p1_new)
                block['points'].append([p[0], p1_new])
        if len(block['points']) > 0:
            _v['blocks'].append(block)
    _v['duration'] = deltatime * (max(p1_all) - min(p1_all)) * 60
    return _v

time_delta = (parse_datetime(ds['time']['tt'].split(".")[0]) - parse_datetime(ds['time']['t0'].split(".")[0]))
num_days = time_delta.days + np.int(np.round(time_delta.seconds * 1.0 / 86400))
layers_per_day = 86400 / (int(ds['time']['dt']) * 60)
weekday_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

for day in range(num_days):
    ds_tmp = {}
    
    # Get layers and communities that happen in this day
    day_layer_min, day_layer_max = layers_per_day*day, layers_per_day*(day+1)
    day_layers = range(day_layer_min, day_layer_max)
    day_layers_non_empty = sorted([l for l in layer_communities.keys() if day_layer_min <= l < day_layer_max])
    day_communities = set([
        'c'+str(g)  # Maybe add 1
        for l in day_layers_non_empty
        for g in ds['layer_networks']['data'][l]['groups'].keys()
    ])
    
    # Community similarities
    ds_tmp['sims'] = dict(
        (k, dict((_k, _v) for _k, _v in v.items() if _k in day_communities))
        for k, v in ds['sims'].items()
        if k in day_communities
    )
    
    # Community geometry data
    ds_tmp['coms'] = dict(
        (k, translate_blocks2(v, -day*layers_per_day, day_layers, int(ds['time']['dt'])))
        for k, v in ds['coms'].items()
        if k in day_communities
    )
    
    
    # Layer network data
    ds_tmp['layer_networks'] = dict()
    ds_tmp['layer_networks']['data'] = dict(
        (k-day*layers_per_day, v)
        for k, v in ds['layer_networks']['data'].items()
        if k in day_layers
    )
    
    # Time data
    ds_tmp['time'] = dict()
    ds_tmp['time']['dt'] = ds['time']['dt']
    ds_tmp['time']['t0'] = str(parse_datetime(ds['time']['t0']) + datetime.timedelta(days=day))
    ds_tmp['time']['tt'] = str(parse_datetime(ds['time']['t0']) + datetime.timedelta(days=(day+1)))
    ds_tmp['time']['ticks'] = dict()
    ds_tmp['time']['ticks']['grid_ticks'] = dict(
        (k-day*layers_per_day, v)
        for k, v in ds['time']['ticks']['grid_ticks'].items()
        if k in day_layers
    )
    ds_tmp['time']['ticks']['label_ticks'] = dict(
        (k-day*layers_per_day, v)
        for k, v in ds['time']['ticks']['label_ticks'].items()
        if k in day_layers
    )
    
    # Metadata
    ds_tmp['meta'] = {'h': len(day_layers), 'w': ds['meta']['w']}
    
    with open('Visualisation/data/dataset%s.json' % weekday_labels[day], 'w') as outfile:
        json.dump(ds_tmp, outfile)