In [1]:
import networkx as nx
from nltk.corpus import wordnet as wn
# need to install nltk and networkx
# also, you may need to download the wordnet data by running nltk.download('wordnet')

In [3]:
# load the semantic tree graph
graph = nx.read_gexf("semantic_tree.gexf")

In [14]:
# nodes are strings of synset names
print(list(graph.nodes()))

# this function converts a synset name to offsets (i.e. imagenet file id).
def to_offset_label(node):
    synset = wn.synset(node.split("'")[1])
    offset = synset.pos()+str(synset.offset()).zfill(8)
    return offset

print(to_offset_label(list(graph.nodes())[1]))

["Synset('tench.n.01')", "Synset('goldfish.n.01')", "Synset('cyprinid.n.01')", "Synset('great_white_shark.n.01')", "Synset('fish.n.01')", "Synset('tiger_shark.n.01')", "Synset('shark.n.01')", "Synset('hammerhead.n.03')", "Synset('electric_ray.n.01')", "Synset('elasmobranch.n.01')", "Synset('stingray.n.01')", "Synset('ray.n.07')", "Synset('cock.n.05')", "Synset('vertebrate.n.01')", "Synset('hen.n.02')", "Synset('bird.n.01')", "Synset('ostrich.n.02')", "Synset('brambling.n.01')", "Synset('goldfinch.n.02')", "Synset('finch.n.01')", "Synset('house_finch.n.01')", "Synset('junco.n.01')", "Synset('indigo_bunting.n.01')", "Synset('robin.n.02')", "Synset('oscine.n.01')", "Synset('bulbul.n.01')", "Synset('thrush.n.03')", "Synset('jay.n.02')", "Synset('magpie.n.01')", "Synset('corvine_bird.n.01')", "Synset('chickadee.n.01')", "Synset('water_ouzel.n.01')", "Synset('kite.n.04')", "Synset('bald_eagle.n.01')", "Synset('bird_of_prey.n.01')", "Synset('vulture.n.01')", "Synset('great_grey_owl.n.01')", "

In [16]:
# edges are tuples: (in_node, out_node).
print(list(graph.edges()))

[("Synset('cyprinid.n.01')", "Synset('tench.n.01')"), ("Synset('cyprinid.n.01')", "Synset('goldfish.n.01')"), ("Synset('fish.n.01')", "Synset('elasmobranch.n.01')"), ("Synset('fish.n.01')", "Synset('food_fish.n.01')"), ("Synset('fish.n.01')", "Synset('teleost_fish.n.01')"), ("Synset('shark.n.01')", "Synset('great_white_shark.n.01')"), ("Synset('shark.n.01')", "Synset('tiger_shark.n.01')"), ("Synset('shark.n.01')", "Synset('hammerhead.n.03')"), ("Synset('elasmobranch.n.01')", "Synset('shark.n.01')"), ("Synset('elasmobranch.n.01')", "Synset('ray.n.07')"), ("Synset('ray.n.07')", "Synset('electric_ray.n.01')"), ("Synset('ray.n.07')", "Synset('stingray.n.01')"), ("Synset('vertebrate.n.01')", "Synset('fish.n.01')"), ("Synset('vertebrate.n.01')", "Synset('bird.n.01')"), ("Synset('vertebrate.n.01')", "Synset('amphibian.n.03')"), ("Synset('vertebrate.n.01')", "Synset('reptile.n.01')"), ("Synset('vertebrate.n.01')", "Synset('mammal.n.01')"), ("Synset('bird.n.01')", "Synset('cock.n.05')"), ("Syns

In [22]:
# example of finding all children of a node "protective_covering"

# find the full name of the synset
node = [ n for n in graph.nodes() if "protective_covering" in n]
print(node)

# find all children of the node
children = list(graph.successors(node[0]))
print(children)

# convert the children to imagenet labels
children = [to_offset_label(c) for c in children]
print(children)

["Synset('protective_covering.n.01')"]
["Synset('binder.n.03')", "Synset('shelter.n.02')", "Synset('lampshade.n.01')", "Synset('screen.n.05')", "Synset('armor_plate.n.01')", "Synset('sheath.n.01')", "Synset('armor.n.01')", "Synset('mask.n.04')", "Synset('roof.n.01')", "Synset('cap.n.04')", "Synset('blind.n.03')"]
['n02840245', 'n04191943', 'n03637318', 'n04151581', 'n02740764', 'n04187061', 'n02739668', 'n03725035', 'n04105068', 'n02955065', 'n02851099']


In [4]:
# number of nodes
print(len(graph.nodes()))

1381


In [5]:
# number of leaves
print(len([n for n in graph.nodes() if len(list(graph.successors(n)))==0]))

1000
