In [1]:
import networkx as nx
import json
import csv
import collections

In [2]:
graph = nx.read_graphml("bidirected_aaa_all.graphml")

# Generate node header #

In [3]:
headers = set()    
nodes = []
id_to_uid = {}
id_to_type = {}

In [4]:
for node_id, properties in graph.nodes(data=True):
    tmp = properties['object'].replace('\'', '"').replace('None', '"None"').replace('nan','"nan"')
    node = json.loads(tmp)
    nodes.append(node)
    id_to_uid[node_id] = node['uids']['auto']
    id_to_type[node_id] = node['type']
    for key in node.keys():
        if key == 'process' or key == 'material' or key == 'ingredient' or \
           key == 'measurement' or key == 'spec' or key == 'conditions' or \
           key == 'parameters' or key == 'properties':
            continue
        else:
            headers.add(key)

In [5]:
headers.remove('uids')
headerList = ['node_id:ID'] + list(headers) + [":LABEL"]

In [6]:
with open("nodes_header.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headerList)

# Generate nodes #

In [7]:
def add_nodes(properties):
    with open("nodes.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for p in properties:
            writer.writerow(p)

In [8]:
properties = []
for node in nodes:
    p = []
    for header in headerList:
        if header == 'node_id:ID':
            p.append(node['uids']['auto'])
        elif header == ':LABEL':
            p.append(node['type'])
        elif header not in node:
            p.append("")
        else:
            p.append(node[header])
    properties.append(p)

In [9]:
add_nodes(properties)

# Generate edge header #

In [10]:
edges_header = [":START_ID", ":END_ID", ":TYPE"]
with open("edges_header.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(edges_header)

# Generate edges #

In [11]:
def add_edges(edges):
    with open("edges.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for edge in edges:
            writer.writerow(edge)

In [12]:
edge_dependencies = collections.defaultdict(set)
edge_dependencies['process_template'] = ('process_spec', 'condition_template', 'parameter_template')
edge_dependencies['material_template'] = ('material_spec', 'property_template')
edge_dependencies['measurement_template'] = ('measurement_spec', 'property_template')
edge_dependencies['process_spec'] = ('material_spec', 'process_run')
edge_dependencies['material_spec'] = ('ingredient_spec', 'material_run')
edge_dependencies['ingredient_spec'] = ('ingredient_run', 'process_spec')
edge_dependencies['measurement_spec'] = ('measurement_run')
edge_dependencies['material_run'] = ('ingredient_run')
edge_dependencies['process_run'] = ('material_run')
edge_dependencies['ingredient_run'] = ('process_run')
edge_dependencies['measurement_run'] = ('material_run')

In [13]:
edges = []
for source, target, properties in graph.edges(data=True):
    if id_to_type[source] in edge_dependencies and id_to_type[target] in edge_dependencies[id_to_type[source]]:
        edges.append([id_to_uid[source], id_to_uid[target], id_to_type[source] + '_' + id_to_type[target]])

In [14]:
add_edges(edges)