In [1]:
import networkx as nx
import numpy as np
import pandas as pd

import sys
sys.path.append('../hierarchies/')

from ete3 import *
from treeutil import *
import ujson as json

In [4]:
def save_json(obj, fname):
    with open(fname, 'w') as f:
        json.dump(obj, f)


In [2]:
df = pd.read_csv('/home/kraken/Tmp/amazon_items.csv', sep='\t')
short_names = df['short name'].tolist()
print(short_names)
print(len(short_names))

['Women Boots', 'Shoe Cream', 'Women Runner', 'Whitener', 'Sneaker', 'Biker Boot Straps ', 'Jewel Solution', 'Dye Kit', 'Silver Cloth', 'Woman Trainer ', 'Ultrasonic Cleaner ', 'Boot Socks ']
12


In [3]:
def Z_to_etetree_v2(n, Z, names):
    ''' Convert a networkx directed graph to an ete tree '''
    assert n == len(names), print(n, len(names))

#     attempts = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
    
    m = len(Z)
    strs = np.array(['' for i in range(n+m)], dtype=object)
    strs[:n] = names[:] 

    for i in range(m):
        tree_str = ','.join(map(str, strs[Z[i]])) 
        strs[i+n] = '({}){}'.format(tree_str, i)
    else:
        ete_tree = '({});'.format(strs[-1])

    return ete_tree


In [5]:

G = nx.DiGraph()

rand_list = np.arange(12)
np.random.shuffle(rand_list)
rand_list

edges = []

cur_node = 12 
for i in range(4):
    count = 0 
    while count < 3:
        print(cur_node, i*3+count)
        edges.append((cur_node, rand_list[i*3+count]))
        count += 1
    
    cur_node += 1

edges.append((16, 12))
edges.append((16, 13))
edges.append((17, 14))
edges.append((17, 15))
edges.append((18, 16))
edges.append((18, 17))

G.add_edges_from(edges)

12 0
12 1
12 2
13 3
13 4
13 5
14 6
14 7
14 8
15 9
15 10
15 11


In [None]:
nx.draw_networkx(G)

In [10]:
Z = tree_to_Z(7, 12, G)
for i in range(len(Z)):
    Z[i] = list(map(int, Z[i]))

save_json(Z, 'amazon_tree_1_Z.json')
tree_str = Z_to_etetree_v2(12, Z, short_names)

In [None]:
def draw_tree(tree_str, out_name):
    t = Tree(tree_str, format=1)
    ts = TreeStyle()
    ts.show_leaf_name = False
    # ts.rotation = 90
    def my_layout(node):
        F = TextFace(node.name, tight_text=True)
        add_face_to_node(F, node, column=0, position="branch-right")
    ts.layout_fn = my_layout

    ts.branch_vertical_margin = 10 # 10 pixels between adjacent branches
#     t.show(tree_style=ts)
    t.render(out_name, tree_style=ts, dpi=900, w=900)

In [None]:
draw_tree(tree_str, 'amazon_tree_1.pdf')

In [12]:
# Generating the ground truth hierarchy
# 
#
a1 = [0, 5, 11]  # boots
a2 = [1, 3, 7]   # Dyes
a3 = [6, 8, 10]  # Care
a4 = [2, 4, 9]   # sneakers

b1 = [a1, a4]    # shoes
b2 = [a2]        # shoe care
b3 = [a3]        # Jewel cleaning

c1 = [b1, b2, b3]


A = [
    [0, 5, 11],  # boots
    [1, 3, 7],   # Dyes
    [6, 8, 10],  # Care
    [2, 4, 9]    # sneakers
]    
B = [[0, 3], [1], [2]]
C = [[0], [1, 2]]


G = nx.DiGraph()

rand_list = np.arange(12)

edges = []

cur_node = 12
node_map = {}
for i in range(len(A)):
    node_map[i] = cur_node
    for node in A[i]:
        edges.append((cur_node, node))
    cur_node += 1

node_map2 = {}
for i in range(len(B)):
    node_map2[i] = cur_node
    for node in B[i]:
        node = node_map[node]
        edges.append((cur_node, node))    
    cur_node += 1
    
D = []
for i in range(len(C)):
    for node in C[i]:
        node = node_map2[node]
        edges.append((cur_node, node))
    D.append(cur_node)
    cur_node += 1

for node in D:
    edges.append((cur_node, node))
    
G.add_edges_from(edges)

print(edges)

[(12, 0), (12, 5), (12, 11), (13, 1), (13, 3), (13, 7), (14, 6), (14, 8), (14, 10), (15, 2), (15, 4), (15, 9), (16, 12), (16, 15), (17, 13), (18, 14), (19, 16), (20, 17), (20, 18), (21, 19), (21, 20)]


In [None]:
nx.draw_networkx(G)

In [13]:
# 8 = number of nodes - 12 
Z = tree_to_Z(10, 12, G)
for i in range(len(Z)):
    Z[i] = list(map(int, Z[i]))
    
save_json(Z, 'amazon_tree_3_Z.json')
tree_str = Z_to_etetree_v2(12, Z, short_names)

In [None]:
draw_tree(tree_str, 'amazon_tree_3.pdf')