### Convert the tlp file output by GrouseFlocks to MyJSON 
* Observing that all of the tlp files are at most two level, this script only deals with two level
* We treat each open metanode as a "true metanode" that has underlying leaf nodes, but the closed metanode does not have underlying leaf nodes.  They are both in the data structure "metanodes"
* This script only works for the datasets "ivOrigins" and "moviedb", which seems different than other datasets that are not using the TopoLayout

In [100]:
from tulip import tlp
from shapely.geometry import *
import os
import math
import json
SQUARE_ROOT_2 = math.sqrt(2)
from pprint import pprint

In [101]:
DATA_DIR = '../../../data/real-world-compiled/grouseflocks/'
FILENAME = 'grouseflocks-ivOrigins-grouseflocks-open-2'
grousetlp_filepath = os.path.join(DATA_DIR, FILENAME + '.tlp')
myjson_filepath = os.path.join(DATA_DIR, FILENAME + '.json')

In [102]:
# Load graph and get the useful subgraphs
graph = tlp.loadGraph(grousetlp_filepath)
assert(graph is not None)
hierarchy_subgraphs = graph.getSubGraph('1__HierarchySubGraphs')
final_layout_display = graph.getSubGraph('1__Final Layout Display')
final_layout = graph.getSubGraph('1__Final Layout')
assert(hierarchy_subgraphs is not None)
assert(final_layout_display is not None)
assert(final_layout is not None)

In [103]:
# Initialize the output data structures
metanodes = {}
leaf_nodes = []
edges = []
root = final_layout.getId()
max_height = 1

In [104]:
label2node = {}
view_label = final_layout_display.getStringProperty('viewLabel')

for n in final_layout_display.getNodes():
    label2node[view_label[n]] = n
len(label2node)
print(label2node)

{'': <node 101>, 'Tree 7: n 1 e 0 (Hier: n 2 e 1)': <node 146>, 'Tree 0: n 2 e 1 (Hier: n 2 e 1)': <node 147>, 'Tree 6: n 2 e 1 (Hier: n 2 e 1)': <node 148>, 'Unknown 15: n 7 e 18 (Hier: n 8 e 24)': <node 149>, 'Tree 9: n 2 e 1 (Hier: n 2 e 1)': <node 150>, 'Near Complete 0: n 2 e 1 (Hier: n 3 e 3)': <node 151>, 'Tree 12: n 25 e 112 (Hier: n 3 e 2)': <node 158>, 'Unknown 5: n 6 e 15 (Hier: n 7 e 17)': <node 159>, 'Near Complete 1: n 2 e 1 (Hier: n 3 e 3)': <node 160>, 'Near Complete 2: n 3 e 3 (Hier: n 4 e 6)': <node 161>, 'Near Complete 3: n 8 e 17 (Hier: n 8 e 28)': <node 164>, 'Near Complete 4: n 2 e 1 (Hier: n 3 e 3)': <node 165>, 'Near Complete 5: n 2 e 1 (Hier: n 3 e 3)': <node 166>, 'Near Complete 6: n 3 e 3 (Hier: n 3 e 3)': <node 167>}


In [105]:
# Determine the node hierarchy in "1__Final Layout".
# Although we can find all subgraphs (and its leaf nodes) in "1__HierarchySubGraphs", 
# it's only a flat list of metanodes even if there are multiple levels in the hierarchy.
# Such a misleading name!

def dfs(current_graph, depth):    
    # find out if the current graph is displayed, aka, present in "1__Final Layout Display"
    if current_graph.getName() not in label2node:
        # keep traversing down because this metanode is open
        for g in current_graph.getSubGraphs():
            # record this metanode
            metanodes[g.getId()] = {
                'id': g.getId(),
                'label': g.getName(),
                'parent_metanode': current_graph.getId(),
                'desc_metanodes': {},  
                'level': depth + 1,
                'leaf_nodes': {},        # since we don't need it now, let's leave it blank.  
                'diameter': 0,      
                'geometry': None,  # TODO how to find out geometry for open metanodes (the loose bounding circle)?
            }
            
            dfs(g, depth + 1)
            
        # Fill in the "desc_metanodes" in post order
        if current_graph.getId() in metanodes:
            t = metanodes[current_graph.getId()]
            for g in current_graph.getSubGraphs():
                # the subgraph itself
                t['desc_metanodes'][g.getId()] = True
                # and its descendants
                dict.update(t['desc_metanodes'], metanodes[g.getId()]['desc_metanodes'])
                
dfs(final_layout, 0)
print(len(metanodes))
# pprint(metanodes)

19


In [106]:
# A mapping between label w metanode id in our data structure "metanodes" is needed
#   because there are mulitple IDs (at least 3 cluster ids and 1 node id) associated with a metanode 
#   in the original tlp files.  
#   We want to make sure we are linking the elements correctly by using a single ID.
#   The label seems to be unique and consistent across different representations in the original tlp file.
label2metanode = {}
for nid, m in metanodes.items():
    label2metanode[m['label']] = nid

In [107]:
# Find out all leaf nodes
view_hier_attr = final_layout_display.getIntegerProperty('view Hier Node to Display')
sub_view_layout = final_layout_display.getLayoutProperty('viewLayout')
sub_view_size = final_layout_display.getSizeProperty('viewSize')
node_in_graph = final_layout_display.getGraphProperty('Node In Graph')
view_label = final_layout_display.getStringProperty('viewLabel')

for n in final_layout_display.getNodes():
    center_coor = sub_view_layout[n]
    width = sub_view_size[n][0]
    # print(n.id, center_coor, diameter)
    if view_hier_attr[n] != -1:
        # leaf nodes
        print('leaf: ', n.id)
        parent_metanode_id = None
        if node_in_graph[n] is not None:
            # Leaf node is an open metanode
            parent_metanode_id = label2metanode[node_in_graph[n].getName()]
        else:
            # leaf node not in an open metanode, this is odd but it happens in ivOrigins
            # but when it happens, the parent_metanode is always the root
            parent_metanode_id = root
        leaf_nodes.append({
            'id': n.id,
            'parent_metanode': parent_metanode_id, 
            'geometry': Point(center_coor.x(), center_coor.y()).buffer(width / 2.0, cap_style=CAP_STYLE.square),
            'diameter': width * SQUARE_ROOT_2,
        })       
    else:
        # closed metanode.  Note that they are already recorded in "metanodes"
        metanode_id = label2metanode[view_label[n]]
        dict.update(metanodes[metanode_id], {
            'geometry': Point(center_coor.x(), center_coor.y()).buffer(width / 2.0, cap_style=CAP_STYLE.round),
            'diatmeter': width
        })
print(len(leaf_nodes))

leaf:  1
leaf:  2
leaf:  6
leaf:  11
leaf:  13
leaf:  17
leaf:  19
leaf:  22
leaf:  23
leaf:  25
leaf:  26
leaf:  30
leaf:  31
leaf:  32
leaf:  33
leaf:  36
leaf:  37
leaf:  42
leaf:  43
leaf:  44
leaf:  49
leaf:  55
leaf:  57
leaf:  59
leaf:  60
leaf:  62
leaf:  71
leaf:  73
leaf:  76
leaf:  80
leaf:  82
leaf:  87
leaf:  89
leaf:  92
leaf:  94
leaf:  101
36


In [108]:
# this is for printing the labels and then determine the ids manually by looking at the figure at Tulip
for nid, m in metanodes.items():
    if m['geometry'] is None and m['parent_metanode'] != root:
        # indicates this is an open metanode
        p = metanodes[m['parent_metanode']]
        print('dealing with: ', m['label'], 'parent:', p['label'])
        

dealing with:  Tree 8: n 10 e 31 (Hier: n 2 e 1) parent: Biconnected 0: n 103 e 505 (Hier: n 19 e 18)
dealing with:  Unknown 13: n 9 e 29 (Hier: n 9 e 30) parent: Tree 8: n 10 e 31 (Hier: n 2 e 1)
dealing with:  Tree 10: n 32 e 201 (Hier: n 3 e 2) parent: Biconnected 0: n 103 e 505 (Hier: n 19 e 18)
dealing with:  Unknown 14: n 23 e 167 (Hier: n 24 e 203) parent: Tree 10: n 32 e 201 (Hier: n 3 e 2)


In [109]:
# Open metanode IDs for different datasets.  Fill in manually

# ivOrigins-open-2
corr_node_ids = {
    'Tree 8: n 10 e 31 (Hier: n 2 e 1)': 126,
    'Unknown 13: n 9 e 29 (Hier: n 9 e 30)': 138,
    'Tree 10: n 32 e 201 (Hier: n 3 e 2)': 127,
    'Unknown 14: n 23 e 167 (Hier: n 24 e 203)': 139
}

In [110]:
# Find the layout info according to the manual defined IDs above
for nid, m in metanodes.items():
    if m['geometry'] is None and m['parent_metanode'] != root:
        # indicates this is an open metanode
        p = metanodes[m['parent_metanode']]
        parent_subgraph = hierarchy_subgraphs.getSubGraph(p['label'])
        assert(parent_subgraph is not None)
        view_layout = parent_subgraph.getLayoutProperty('viewLayout')
        view_size = parent_subgraph.getSizeProperty('viewSize')

        # Find the corresponding node
        which_node = None
        for n in parent_subgraph.getNodes():
            if n.id == corr_node_ids[m['label']]:
                which_node = n
                break
        assert(which_node is not None)
        
        center_coor = view_layout[which_node]
        diameter = view_size[which_node][0]
        dict.update(m, {
            'geometry': Point(center_coor.x(), center_coor.y()).buffer(width / 2.0, cap_style=CAP_STYLE.round),
            'diatmeter': diameter
        })
        pprint(m)


{'desc_metanodes': {5: True, 6: True, 7: True},
 'diameter': 0,
 'diatmeter': 11.273799896240234,
 'geometry': <shapely.geometry.polygon.Polygon object at 0x108a13940>,
 'id': 4,
 'label': 'Tree 8: n 10 e 31 (Hier: n 2 e 1)',
 'leaf_nodes': {},
 'level': 2,
 'parent_metanode': 3}
{'desc_metanodes': {7: True},
 'diameter': 0,
 'diatmeter': 9.758910179138184,
 'geometry': <shapely.geometry.polygon.Polygon object at 0x108a139e8>,
 'id': 6,
 'label': 'Unknown 13: n 9 e 29 (Hier: n 9 e 30)',
 'leaf_nodes': {},
 'level': 3,
 'parent_metanode': 4}
{'desc_metanodes': {9: True, 10: True, 11: True, 12: True},
 'diameter': 0,
 'diatmeter': 18.94179916381836,
 'geometry': <shapely.geometry.polygon.Polygon object at 0x1089fcfd0>,
 'id': 8,
 'label': 'Tree 10: n 32 e 201 (Hier: n 3 e 2)',
 'leaf_nodes': {},
 'level': 2,
 'parent_metanode': 3}
{'desc_metanodes': {10: True},
 'diameter': 0,
 'diatmeter': 14.49209976196289,
 'geometry': <shapely.geometry.polygon.Polygon object at 0x108a6bef0>,
 'id': 9

In [111]:
view_layout = final_layout_display.getLayoutProperty('viewLayout')

for e in final_layout_display.getEdges():
    src, tgt = graph.ends(e)
    if src.id > tgt.id:
        tmp = src
        src = tgt
        tgt = tmp
    edge_id = '{}-{}'.format(src.id, tgt.id)
    # print(edge_id, view_layout[src], view_layout[tgt])
    edges.append({
        'id': edge_id,
        'ends': (src.id, tgt.id),
        'geometry': LineString([(view_layout[src].x(), view_layout[src].y()),
                                (view_layout[tgt].x(), view_layout[tgt].y())])
    })
print(len(edges))

267


In [112]:
bbox = tlp.computeBoundingBox(final_layout_display)

In [113]:
# Use the mapping function from shapely to serialize the geometry objects
for n in leaf_nodes:
    n['geometry'] = mapping(n['geometry'])
for e in edges:
    e['geometry'] = mapping(e['geometry'])
for _, n in metanodes.items():
    if n['geometry'] is None:
        # dirty fix: the root is not going to intersect with anyone so use a random point
        n['geometry'] = Point(0,0)
    n['geometry'] = mapping(n['geometry']) 
    if n['level'] > max_height:
        max_height = n['level']

json_data = {
    'leaf_nodes': leaf_nodes,
    'edges': edges,
    'height': max_height,
    'root': root,
    'metanodes': metanodes,
    'bounding_box': [[bbox[0].x(), bbox[0].y()], [bbox[1].x(), bbox[1].y()]]
}

In [114]:
json.dump(json_data, open(myjson_filepath, 'w'), indent=2)
print('Converted to ', myjson_filepath, ' #nodes:', len(leaf_nodes), ' #edges: ', len(edges), 
      '#metanodes:', len(metanodes),
      ' height: ', json_data['height'])

Converted to  ../../../data/real-world-compiled/grouseflocks/grouseflocks-ivOrigins-grouseflocks-open-2.json  #nodes: 36  #edges:  267 #metanodes: 19  height:  4
