In [1]:
import json
import csv
from jsonpath_ng.ext import parse
import collections
import os

In [2]:
directory_path = "soren_NYO_gemd_model_dumps"
json_data = []
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    try:
        with open(file_path, encoding='utf-8') as file:
            json_data.append(json.load(file))
    except:
        print("skipping")

skipping
skipping
skipping


# Nodes #

## Header ##

In [3]:
nodes_header = ["node_id:ID", "name", "notes", "real_lower_bound", "real_upper_bound", "real_units", "real_name_sub", \
                "real_value_sub","real_units_sub", "description", "type", "tags", "sample_type", "mass_fraction", \
                "number_fraction", "volume_fraction", "source_performed_by", "source_performed_date", "source_type", \
                "formula","absolute_quantity_lower_bound", "absolute_quantity_type", "absolute_quantity_units", \
                "absolute_quantity_upper_bound", "absolute_nominal", "constituent", "allowed_labels", "allowed_names", "file_links", \
                ":LABEL"]

with open("soren_NYO_gemd_model_dumps/nodes_header.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(nodes_header)

In [4]:
node_ids = set()

In [5]:
with open("soren_NYO_gemd_model_dumps/nodes.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow("")

## Template, Spec, Run ##

### Template ###

In [6]:
# material_template, process_template, measurement_template
def template_nodes_generator_1(nodes):
    with open("soren_NYO_gemd_model_dumps/nodes.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for n in nodes:
            node_id = n.value['uids']['auto']
            if node_id in node_ids: continue
            node_ids.add(node_id)
            name = n.value['name']
            notes = n.value['notes'] if 'notes' in n.value else ""
            real_lower_bound = ""
            real_upper_bound = ""
            real_units = ""
            real_name_sub = []
            real_value_sub = []
            real_units_sub = []
            description = n.value['description'] if 'description' in n.value else ""
            node_type = n.value['type']
            tags = n.value['tags'] if 'tags' in n.value else []
            sample_type = n.value['sample_type'] if 'sample_type' in n.value else ""
            mass_fraction = n.value['mass_fraction'] if 'mass_fraction' in n.value else ""
            number_fraction = n.value['number_fraction'] if 'number_fraction' in n.value else ""
            volume_fraction = n.value['volume_fraction'] if 'volume_fraction' in n.value else ""
            source_performed_by = n.value['source']['performed_by'] if 'source' in n.value else ""
            source_performed_date = n.value['source']['performed_date'] if 'source' in n.value else ""
            source_type = n.value['source']['type'] if 'source' in n.value else ""
            formula = []
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_type = n.value['absolute_quantity']['type'] if 'absolute_quantity' in n.value \
                                     and 'type' in n.value['absolute_quantity'] else ""
            absolute_quantity_units = n.value['absolute_quantity']['units'] if 'absolute_quantity' in n.value \
                                      and 'units' in n.value['absolute_quantity'] else ""
            absolute_quantity_upper_bound = n.value['absolute_quantity']['upper_bound'] if 'absolute_quantity' in n.value \
                                            and 'upper_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_nominal = n.value['absolute_quantity']['nominal'] if 'absolute_quantity' in n.value \
                                        and 'nominal' in n.value['absolute_quantity'] else ""
            constituent = []
            allowed_labels = n.value['allowed_labels'] if 'allowed_labels' in n.value else ""
            allowed_names = n.value['allowed_names'] if 'allowed_names' in n.value else ""
            file_links = n.value['file_links'] if 'file_links' in n.value else ""
            label = n.value['type']
            node_value = [node_id, name, notes, real_lower_bound, real_upper_bound, real_units, real_name_sub, \
                      real_value_sub, real_units_sub, description, node_type, tags, sample_type, mass_fraction, \
                      number_fraction, volume_fraction, source_performed_by, source_performed_date, source_type, \
                      formula, absolute_quantity_lower_bound, absolute_quantity_type, absolute_quantity_units, \
                      absolute_quantity_upper_bound, absolute_quantity_nominal, constituent, allowed_labels, \
                      allowed_names, file_links, label]
            writer.writerow(node_value)

In [7]:
# parameter/condition/property template
def template_nodes_generator_2(nodes):
    with open("soren_NYO_gemd_model_dumps/nodes.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for n in nodes:
            node_id = n.value['uids']['auto']
            if node_id in node_ids: continue
            node_ids.add(node_id)
            name = n.value['name']
            notes = n.value['notes'] if 'notes' in n.value else ""
            real_lower_bound = n.value['bounds']['lower_bound'] if ('bounds' in n.value and n.value['bounds']['type'] == 'real_bounds') else ""
            real_upper_bound = n.value['bounds']['upper_bound'] if ('bounds' in n.value and n.value['bounds']['type'] == 'real_bounds') else ""
            real_units = n.value['bounds']['default_units'] if ('bounds' in n.value and n.value['bounds']['type'] == 'real_bounds') else ""
            real_name_sub = []
            real_value_sub = []
            real_units_sub = []
            description = n.value['description'] if 'description' in n.value else ""
            node_type = n.value['type']
            tags = n.value['tags'] if 'tags' in n.value else []  
            sample_type = n.value['sample_type'] if 'sample_type' in n.value else ""
            mass_fraction = n.value['mass_fraction'] if 'mass_fraction' in n.value else ""
            number_fraction = n.value['number_fraction'] if 'number_fraction' in n.value else ""
            volume_fraction = n.value['volume_fraction'] if 'volume_fraction' in n.value else ""
            source_performed_by = n.value['source']['performed_by'] if 'source' in n.value else ""
            source_performed_date = n.value['source']['performed_date'] if 'source' in n.value else ""
            source_type = n.value['source']['type'] if 'source' in n.value else ""
            formula = []
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_type = n.value['absolute_quantity']['type'] if 'absolute_quantity' in n.value \
                                     and 'type' in n.value['absolute_quantity'] else ""
            absolute_quantity_units = n.value['absolute_quantity']['units'] if 'absolute_quantity' in n.value \
                                      and 'units' in n.value['absolute_quantity'] else ""
            absolute_quantity_upper_bound = n.value['absolute_quantity']['upper_bound'] if 'absolute_quantity' in n.value \
                                            and 'upper_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_nominal = n.value['absolute_quantity']['nominal'] if 'absolute_quantity' in n.value \
                                        and 'nominal' in n.value['absolute_quantity'] else ""
            constituent = []
            allowed_labels = n.value['allowed_labels'] if 'allowed_labels' in n.value else ""
            allowed_names = n.value['allowed_names'] if 'allowed_names' in n.value else ""
            file_links = n.value['file_links'] if 'file_links' in n.value else ""
            label = n.value['type']
            node_value = [node_id, name, notes, real_lower_bound, real_upper_bound, real_units, real_name_sub, \
                      real_value_sub, real_units_sub, description, node_type, tags, sample_type, mass_fraction, \
                      number_fraction, volume_fraction, source_performed_by, source_performed_date, source_type, \
                      formula, absolute_quantity_lower_bound, absolute_quantity_type, absolute_quantity_units, \
                      absolute_quantity_upper_bound, absolute_quantity_nominal, constituent, allowed_labels, \
                      allowed_names, file_links, label]
            writer.writerow(node_value)

In [8]:
material_template_expr = parse('$[?(@.type == "material_template")]')
nodes = material_template_expr.find(json_data)
template_nodes_generator_1(nodes)

In [9]:
process_template_expr = parse('$[?(@.type == "process_template")]')
nodes = process_template_expr.find(json_data)
template_nodes_generator_1(nodes)

In [10]:
measurement_template_expr = parse('$[?(@.type == "measurement_template")]')
nodes = measurement_template_expr.find(json_data)
template_nodes_generator_1(nodes)

In [11]:
parameter_template_expr = parse('$[?(@.type == "parameter_template")]')
nodes = parameter_template_expr.find(json_data)
template_nodes_generator_2(nodes)

In [12]:
property_template_expr = parse('$[?(@.type == "property_template")]')
nodes = property_template_expr.find(json_data)
template_nodes_generator_2(nodes)

In [13]:
condition_template_expr = parse('$[?(@.type == "condition_template")]')
nodes = condition_template_expr.find(json_data)
template_nodes_generator_2(nodes)

### Categorical Bounds ###

In [14]:
i = 0
categoryName_to_id = {}

In [15]:
def categorical_nodes_generator(i, nodes):
    with open("soren_NYO_gemd_model_dumps/nodes.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for n in nodes:
            if 'bounds' in n.value and n.value['bounds']['type'] == 'categorical_bounds':
                for j in range(len(n.value['bounds']['categories'])):
                    node_id = "categorical_bounds " + str(i)
                    name = n.value['bounds']['categories'][j]
                    if name in categoryName_to_id: continue
                    categoryName_to_id[name] = node_id
                    notes = ""
                    real_lower_bound = ""
                    real_upper_bound = ""
                    real_units = ""
                    real_name_sub = []
                    real_value_sub = []
                    real_units_sub = []
                    description = ""
                    node_type = 'categorical_bounds'
                    tags = []
                    sample_type = ""
                    mass_fraction = ""
                    number_fraction = ""
                    volume_fraction = ""
                    source_performed_by = ""
                    source_performed_date = ""
                    source_type = ""
                    formula = []
                    absolute_quantity_lower_bound = ""
                    absolute_quantity_type = ""
                    absolute_quantity_units = ""
                    absolute_quantity_upper_bound = ""
                    absolute_quantity_nominal = ""
                    constituent = []
                    allowed_labels = ""
                    allowed_names = ""
                    file_links = ""
                    label = 'categorical_bounds'
                    node_value = [node_id, name, notes, real_lower_bound, real_upper_bound, real_units, real_name_sub, \
                      real_value_sub, real_units_sub, description, node_type, tags, sample_type, mass_fraction, \
                      number_fraction, volume_fraction, source_performed_by, source_performed_date, source_type, \
                      formula, absolute_quantity_lower_bound, absolute_quantity_type, absolute_quantity_units, \
                      absolute_quantity_upper_bound, absolute_quantity_nominal, constituent, allowed_labels, \
                      allowed_names, file_links, label]
                    writer.writerow(node_value)
                    i += 1
    return i

In [16]:
parameter_template_expr = parse('$[?(@.type == "parameter_template")]')
nodes = parameter_template_expr.find(json_data)
i = categorical_nodes_generator(i, nodes)

In [17]:
property_template_expr = parse('$[?(@.type == "property_template")]')
nodes = property_template_expr.find(json_data)
i = categorical_nodes_generator(i, nodes)

In [18]:
condition_template_expr = parse('$[?(@.type == "condition_template")]')
nodes = condition_template_expr.find(json_data)
i = categorical_nodes_generator(i, nodes)

### Spec ###

In [19]:
import re
def extract_capitalized_words(composition):
    tmp = re.sub(r'[^a-zA-Z]', '', composition)
    constituents = re.findall(r'[A-Z][a-z]*', tmp)
    return constituents

In [20]:
def spec_nodes_generator(nodes):
    with open("soren_NYO_gemd_model_dumps/nodes.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for n in nodes:
            node_id = n.value['uids']['auto']
            if node_id in node_ids: continue
            node_ids.add(node_id)
            name = n.value['name']
            notes = n.value['notes'] if 'notes' in n.value else ""
            real_lower_bound = ""
            real_upper_bound = ""
            real_units = ""
            real_name_sub = []
            real_value_sub = []
            real_units_sub = []
            formula = []
            if 'properties' in n.value:
                for node_property in n.value['properties']:
                    if node_property['property']['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_property['property']['name'])
                        real_value_sub.append(node_property['property']['value']['nominal'])
                        real_units_sub.append(node_property['property']['value']['units'])
                    if node_property['property']['value']['type'] == 'empirical_formula':
                        formula.append(node_property['property']['value']['formula'])
            if 'conditions' in n.value:
                for node_condition in n.value['conditions']:
                    if node_condition['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_condition['name'])
                        real_value_sub.append(node_condition['value']['nominal'])
                        real_units_sub.append(node_condition['value']['units'])
                    if node_condition['value']['type'] == 'empirical_formula':
                        formula.append(node_condition['value']['formula'])
            if 'parameters' in n.value:
                for node_parameter in n.value['parameters']:
                    if node_parameter['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_parameter['name'])
                        real_value_sub.append(node_parameter['value']['nominal'])
                        real_units_sub.append(node_parameter['value']['units'])
                    if node_parameter['value']['type'] == 'empirical_formula':
                        formula.append(node_parameter['value']['formula'])
            description = n.value['description'] if 'description' in n.value else ""
            node_type = n.value['type']
            tags = n.value['tags'] if 'tags' in n.value else [] 
            sample_type = n.value['sample_type'] if 'sample_type' in n.value else ""
            mass_fraction = n.value['mass_fraction'] if 'mass_fraction' in n.value else ""
            number_fraction = n.value['number_fraction'] if 'number_fraction' in n.value else ""
            volume_fraction = n.value['volume_fraction'] if 'volume_fraction' in n.value else ""
            source_performed_by = n.value['source']['performed_by'] if 'source' in n.value else ""
            source_performed_date = n.value['source']['performed_date'] if 'source' in n.value else ""
            source_type = n.value['source']['type'] if 'source' in n.value else ""
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_type = n.value['absolute_quantity']['type'] if 'absolute_quantity' in n.value \
                                     and 'type' in n.value['absolute_quantity'] else ""
            absolute_quantity_units = n.value['absolute_quantity']['units'] if 'absolute_quantity' in n.value \
                                      and 'units' in n.value['absolute_quantity'] else ""
            absolute_quantity_upper_bound = n.value['absolute_quantity']['upper_bound'] if 'absolute_quantity' in n.value \
                                            and 'upper_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_nominal = n.value['absolute_quantity']['nominal'] if 'absolute_quantity' in n.value \
                                        and 'nominal' in n.value['absolute_quantity'] else ""
            constituent = []
            for composition in formula:
                constituent += extract_capitalized_words(composition)
            constituent = list(set(constituent))
            allowed_labels = n.value['allowed_labels'] if 'allowed_labels' in n.value else ""
            allowed_names = n.value['allowed_names'] if 'allowed_names' in n.value else ""
            file_links = n.value['file_links'] if 'file_links' in n.value else ""
            label = n.value['type']
            node_value = [node_id, name, notes, real_lower_bound, real_upper_bound, real_units, real_name_sub, \
                      real_value_sub, real_units_sub, description, node_type, tags, sample_type, mass_fraction, \
                      number_fraction, volume_fraction, source_performed_by, source_performed_date, source_type, \
                      formula, absolute_quantity_lower_bound, absolute_quantity_type, absolute_quantity_units, \
                      absolute_quantity_upper_bound, absolute_quantity_nominal, constituent, allowed_labels, allowed_names, file_links, label]
            writer.writerow(node_value)

In [21]:
material_spec_expr = parse('$[?(@.type == "material_spec")]')
nodes = material_spec_expr.find(json_data)
spec_nodes_generator(nodes)

In [22]:
ingredient_spec_expr = parse('$[?(@.type == "ingredient_spec")]')
nodes = ingredient_spec_expr.find(json_data)
spec_nodes_generator(nodes)

In [23]:
process_spec_expr = parse('$[?(@.type == "process_spec")]')
nodes = process_spec_expr.find(json_data)
spec_nodes_generator(nodes)

In [24]:
measurement_spec_expr = parse('$[?(@.type == "measurement_spec")]')
nodes = measurement_spec_expr.find(json_data)
spec_nodes_generator(nodes)

### run ###

In [25]:
def run_nodes_generator(nodes):
    with open("soren_NYO_gemd_model_dumps/nodes.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        for n in nodes:
            node_id = n.value['uids']['auto']
            if node_id in node_ids: continue
            node_ids.add(node_id)
            name = n.value['name']
            notes = n.value['notes'] if 'notes' in n.value else ""
            real_lower_bound = ""
            real_upper_bound = ""
            real_units = ""
            real_name_sub = []
            real_value_sub = []
            real_units_sub = []
            formula = []
            if 'properties' in n.value:
                for node_property in n.value['properties']:
                    if node_property['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_property['name'])
                        real_value_sub.append(node_property['value']['nominal'])
                        real_units_sub.append(node_property['value']['units'])
                    if node_property['value']['type'] == 'empirical_formula':
                        formula.append(node_property['value']['formula'])
            if 'conditions' in n.value:
                for node_condition in n.value['conditions']:
                    if node_condition['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_condition['name'])
                        real_value_sub.append(node_condition['value']['nominal'])
                        real_units_sub.append(node_condition['value']['units'])
                    if node_condition['value']['type'] == 'empirical_formula':
                        formula.append(node_condition['value']['formula'])
            if 'parameters' in n.value:
                for node_parameter in n.value['parameters']:
                    if node_parameter['value']['type'] == 'nominal_real':
                        real_name_sub.append(node_parameter['name'])
                        real_value_sub.append(node_parameter['value']['nominal'])
                        real_units_sub.append(node_parameter['value']['units'])
                    if node_parameter['value']['type'] == 'empirical_formula':
                        formula.append(node_parameter['value']['formula'])
            description = n.value['description'] if 'description' in n.value else ""
            node_type = n.value['type']
            tags = n.value['tags'] if 'tags' in n.value else [] 
            sample_type = n.value['sample_type'] if 'sample_type' in n.value else ""
            mass_fraction = n.value['mass_fraction'] if 'mass_fraction' in n.value else ""
            number_fraction = n.value['number_fraction'] if 'number_fraction' in n.value else ""
            volume_fraction = n.value['volume_fraction'] if 'volume_fraction' in n.value else ""
            source_performed_by = n.value['source']['performed_by'] if 'source' in n.value and n.value['source'] else ""
            source_performed_date = n.value['source']['performed_date'] if 'source' in n.value and n.value['source'] else ""
            source_type = n.value['source']['type'] if 'source' in n.value and n.value['source'] else ""
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_lower_bound = n.value['absolute_quantity']['lower_bound'] if 'absolute_quantity' in n.value \
                                            and 'lower_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_type = n.value['absolute_quantity']['type'] if 'absolute_quantity' in n.value \
                                     and 'type' in n.value['absolute_quantity'] else ""
            absolute_quantity_units = n.value['absolute_quantity']['units'] if 'absolute_quantity' in n.value \
                                      and 'units' in n.value['absolute_quantity'] else ""
            absolute_quantity_upper_bound = n.value['absolute_quantity']['upper_bound'] if 'absolute_quantity' in n.value \
                                            and 'upper_bound' in n.value['absolute_quantity'] else ""
            absolute_quantity_nominal = n.value['absolute_quantity']['nominal'] if 'absolute_quantity' in n.value \
                                        and 'nominal' in n.value['absolute_quantity'] else ""
            constituent = []
            for composition in formula:
                constituent += extract_capitalized_words(composition)
            constituent = list(set(constituent))
            allowed_labels = n.value['allowed_labels'] if 'allowed_labels' in n.value else ""
            allowed_names = n.value['allowed_names'] if 'allowed_names' in n.value else ""
            file_links = n.value['file_links'] if 'file_links' in n.value else ""
            label = n.value['type']
            node_value = [node_id, name, notes, real_lower_bound, real_upper_bound, real_units, real_name_sub, \
                      real_value_sub, real_units_sub, description, node_type, tags, sample_type, mass_fraction, \
                      number_fraction, volume_fraction, source_performed_by, source_performed_date, source_type, \
                      formula, absolute_quantity_lower_bound, absolute_quantity_type, absolute_quantity_units, \
                      absolute_quantity_upper_bound, absolute_quantity_nominal, constituent, allowed_labels, allowed_names, file_links, label]
            writer.writerow(node_value)

In [26]:
material_run_expr = parse('$[?(@.type == "material_run")]')
nodes = material_run_expr.find(json_data)
run_nodes_generator(nodes)

In [27]:
ingredient_run_expr = parse('$[?(@.type == "ingredient_run")]')
nodes = ingredient_run_expr.find(json_data)
run_nodes_generator(nodes)

In [28]:
process_run_expr = parse('$[?(@.type == "process_run")]')
nodes = process_run_expr.find(json_data)
run_nodes_generator(nodes)

In [29]:
measurement_run_expr = parse('$[?(@.type == "measurement_run")]')
nodes = measurement_run_expr.find(json_data)
run_nodes_generator(nodes)

# Edges #

In [70]:
edges_header = [":START_ID", ":END_ID", ":TYPE"]
with open("soren_NYO_gemd_model_dumps/edges_header.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(edges_header)

### material template -> material spec ###

In [89]:
materialTemplate_to_propertyTemplate = collections.defaultdict(list)
materialSpec_to_propertyTemplate = collections.defaultdict(list)

In [90]:
material_template_expr = parse('$[?(@.type == "material_template")]')
nodes = material_template_expr.find(json_data)

In [91]:
for n in nodes:
    node_id = n.value['uids']['auto']
    for node_property in n.value['properties']:
        materialTemplate_to_propertyTemplate[node_id].append(node_property[0]['id'])

In [92]:
material_spec_expr = parse('$[?(@.type == "material_spec")]')
nodes = material_spec_expr.find(json_data)

In [93]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        if n.value['template']:
            template_id = n.value['template']['id']
            spec_id = n.value['uids']['auto']
            materialSpec_to_propertyTemplate[spec_id] = materialTemplate_to_propertyTemplate[template_id]
            label = 'materialTemplate_to_materialSpec'
            edge_value = [template_id, spec_id, label]
            writer.writerow(edge_value)

### property/parameter/condition template -> categorical bounds ###

In [94]:
property_template_expr = parse('$[?(@.type == "property_template")]')
nodes = property_template_expr.find(json_data)

In [95]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        source_id = n.value['uids']['auto']
        if 'categories' in n.value['bounds']:
            for category in n.value['bounds']['categories']:
                category_id = categoryName_to_id[category]
                label = 'property_to_category'
                edge_value = [source_id, category_id, label]
                writer.writerow(edge_value)

In [96]:
parameter_template_expr = parse('$[?(@.type == "parameter_template")]')
nodes = parameter_template_expr.find(json_data)

In [97]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        source_id = n.value['uids']['auto']
        if 'categories' in n.value['bounds']:
            for category in n.value['bounds']['categories']:
                category_id = categoryName_to_id[category]
                label = 'parameter_to_category'
                edge_value = [source_id, category_id, label]
                writer.writerow(edge_value)

In [98]:
condition_template_expr = parse('$[?(@.type == "condition_template")]')
nodes = condition_template_expr.find(json_data)

In [99]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        source_id = n.value['uids']['auto']
        if 'categories' in n.value['bounds']:
            for category in n.value['bounds']['categories']:
                category_id = categoryName_to_id[category]
                label = 'condition_to_category'
                edge_value = [source_id, category_id, label]
                writer.writerow(edge_value)

### material spec -> property template ###

In [100]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for spec_id, category_ids in materialSpec_to_propertyTemplate.items():
        for category_id in category_ids:
            label = 'materialSpec_to_propertyTemplate'
            edge_value = [spec_id, category_id, label]
            writer.writerow(edge_value)

### categorical bounds -> ingredient spec ###

In [101]:
materialSpecId_to_categoryId = collections.defaultdict(list)

In [102]:
material_spec_expr = parse('$[?(@.type == "material_spec")]')
nodes = material_spec_expr.find(json_data)

In [103]:
for n in nodes:
    spec_id = n.value['uids']['auto']
    if 'properties' in n.value:
        for node_property in n.value['properties']:
            if node_property['property']['value']['type'] == 'nominal_categorical':
                materialSpecId_to_categoryId[spec_id].append(categoryName_to_id[node_property['property']['value']['category']])

In [104]:
ingredient_spec_expr = parse('$[?(@.type == "ingredient_spec")]')
nodes = ingredient_spec_expr.find(json_data)

In [105]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        if n.value['material']:
            material_id = n.value['material']['id']
            for category_id in materialSpecId_to_categoryId[material_id]:
                edge_value = [category_id, ingredient_id, 'category_to_ingredient']
                writer.writerow(edge_value)

In [106]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        if n.value['material']:
            material_id = n.value['material']['id']
            if len(materialSpecId_to_categoryId[material_id]) == 0:
                edge_value = [material_id, ingredient_id, 'materialSpec_to_ingredientSpec']
                writer.writerow(edge_value)

### ingredient_spec -> parameter/condition template ###

In [107]:
processSpecId_to_parameterCategoryId = collections.defaultdict(list)
processSpecId_to_conditionCategoryId = collections.defaultdict(list)
processSpecId_to_parameterTemplateId = collections.defaultdict(list)
processSpecId_to_conditionTemplateId = collections.defaultdict(list)

In [108]:
processTemplateId_to_parameterTemplateId = collections.defaultdict(list)
processTemplateId_to_conditionTemplateId = collections.defaultdict(list)

In [109]:
process_template_expr = parse('$[?(@.type == "process_template")]')
nodes = process_template_expr.find(json_data)

In [110]:
for n in nodes:
    node_id = n.value['uids']['auto']
    for node_conditon in n.value['conditions']:
        processTemplateId_to_conditionTemplateId[node_id].append(node_conditon[0]['id'])
    for node_parameter in n.value['parameters']:
        processTemplateId_to_parameterTemplateId[node_id].append(node_parameter[0]['id'])

In [111]:
process_spec_expr = parse('$[?(@.type == "process_spec")]')
nodes = process_spec_expr.find(json_data)

In [112]:
for n in nodes:
    spec_id = n.value['uids']['auto']
    template_id = n.value['template']['id']
    for parameter_id in processTemplateId_to_parameterTemplateId[template_id]:
        processSpecId_to_parameterTemplateId[spec_id].append(parameter_id)
    for condition_id in processTemplateId_to_conditionTemplateId[template_id]:
        processSpecId_to_conditionTemplateId[spec_id].append(condition_id)

In [113]:
for n in nodes:
    process_id = n.value['uids']['auto']
    if 'conditions' in n.value:
        for node_condition in n.value['conditions']:
            if node_condition['value']['type'] == 'nominal_categorical':
                condition_name = node_condition['value']['category']
                category_id = categoryName_to_id[condition_name]
                if category_id not in processSpecId_to_conditionCategoryId[process_id]:
                    processSpecId_to_conditionCategoryId[process_id].append(category_id)
    if 'parameters' in n.value:
        for node_parameter in n.value['parameters']:
            if node_parameter['value']['type'] == 'nominal_categorical':
                parameter_name = node_parameter['value']['category']
                category_id = categoryName_to_id[parameter_name]
                if category_id not in processSpecId_to_parameterCategoryId[process_id]:
                    processSpecId_to_parameterCategoryId[process_id].append(category_id)

In [114]:
ingredient_spec_expr = parse('$[?(@.type == "ingredient_spec")]')
nodes = ingredient_spec_expr.find(json_data)

In [115]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        process_id = n.value['process']['id']
        for conditionTemplate_id in processSpecId_to_conditionTemplateId[process_id]:
            edge_value = [ingredient_id, conditionTemplate_id, 'ingredientSpec_to_conditionTemplate']
            writer.writerow(edge_value)
        for parameterTemplate_id in processSpecId_to_parameterTemplateId[process_id]:
            edge_value = [ingredient_id, parameterTemplate_id, 'ingredientSpec_to_parameterTemplate']
            writer.writerow(edge_value)

In [116]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        process_id = n.value['process']['id']
        if len(processSpecId_to_conditionTemplateId[process_id]) == 0 \
           and len(processSpecId_to_parameterTemplateId[process_id]) == 0: 
            edge_value = [ingredient_id, process_id, 'ingredientSpec_to_processSpec']
            writer.writerow(edge_value)

### categorical bounds -> process spec ###

In [117]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for spec_id, category_ids in processSpecId_to_parameterCategoryId.items():
        for category_id in category_ids:
            edge_value = [category_id, process_id, 'category_to_processSpec']
            writer.writerow(edge_value)
    for spec_id, category_ids in processSpecId_to_conditionCategoryId.items():
        for category_id in category_ids:
            edge_value = [category_id, process_id, 'category_to_processSpec']
            writer.writerow(edge_value)

### process spec -> material spec ###

In [118]:
material_spec_expr = parse('$[?(@.type == "material_spec")]')
nodes = material_spec_expr.find(json_data)

In [119]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        material_id = n.value['uids']['auto']
        if n.value['process']:
            process_id = n.value['process']['id']
            edge_value = [process_id, material_id, 'processSpec_to_materialSpec']
            writer.writerow(edge_value)

### material spec -> material run ###

In [120]:
material_run_expr = parse('$[?(@.type == "material_run")]')
nodes = material_run_expr.find(json_data)

In [121]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        run_id = n.value['uids']['auto']
        spec_id = n.value['spec']['id']
        edge_value = [spec_id, run_id, 'materialSpec_to_materialRun']
        writer.writerow(edge_value)

### ingredient_spec -> ingredient_run ###

In [122]:
ingredient_run_expr = parse('$[?(@.type == "ingredient_run")]')
nodes = ingredient_run_expr.find(json_data)

In [123]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        run_id = n.value['uids']['auto']
        spec_id = n.value['spec']['id']
        edge_value = [spec_id, run_id, 'ingredientSpec_to_ingredientRun']
        writer.writerow(edge_value)

### process template -> process spec ###

In [124]:
process_spec_expr = parse('$[?(@.type == "process_spec")]')
nodes = process_spec_expr.find(json_data)

In [125]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        spec_id = n.value['uids']['auto']
        template_id = n.value['template']['id']
        edge_value = [template_id, spec_id, 'processTemplate_to_processSpec']
        writer.writerow(edge_value)

### process spec -> process run ###

In [126]:
process_run_expr = parse('$[?(@.type == "process_run")]')
nodes = process_run_expr.find(json_data)

In [127]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        run_id = n.value['uids']['auto']
        spec_id = n.value['spec']['id']
        edge_value = [spec_id, run_id, 'processSpec_to_processRun']
        writer.writerow(edge_value)

### material run -> ingredient run ###

In [128]:
ingredient_run_expr = parse('$[?(@.type == "ingredient_run")]')
nodes = ingredient_run_expr.find(json_data)

In [129]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        if n.value['material']:
            material_id = n.value['material']['id']
            edge_value = [material_id, ingredient_id, 'materialRun_to_ingredientRun']
            writer.writerow(edge_value)

### ingredient run -> process run ###

In [130]:
processRunId_to_parameterCategoryId = collections.defaultdict(list)
processRunId_to_conditionCategoryId = collections.defaultdict(list)
processRunId_to_parameterTemplateId = collections.defaultdict(list)
processRunId_to_conditionTemplateId = collections.defaultdict(list)

In [131]:
process_run_expr = parse('$[?(@.type == "process_run")]')
nodes = process_run_expr.find(json_data)

In [132]:
for n in nodes:
    run_id = n.value['uids']['auto']
    spec_id = n.value['spec']['id']
    for parameter_id in processSpecId_to_parameterTemplateId[spec_id]:
        processRunId_to_parameterTemplateId[run_id].append(parameter_id)
    for condition_id in processSpecId_to_conditionTemplateId[spec_id]:
        processRunId_to_conditionTemplateId[run_id].append(condition_id)

In [133]:
for n in nodes:
    run_id = n.value['uids']['auto']
    spec_id = n.value['spec']['id']
    for category_id in processSpecId_to_parameterCategoryId[spec_id]:
        processRunId_to_parameterCategoryId[run_id].append(category_id)
    for category_id in processSpecId_to_conditionCategoryId[spec_id]:
        processRunId_to_conditionCategoryId[run_id].append(category_id)

In [134]:
ingredient_run_expr = parse('$[?(@.type == "ingredient_run")]')
nodes = ingredient_run_expr.find(json_data)

In [135]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        process_id = n.value['process']['id']
        for conditionTemplate_id in processRunId_to_conditionTemplateId[process_id]:
            edge_value = [ingredient_id, conditionTemplate_id, 'ingredientRun_to_conditionTemplate']
            writer.writerow(edge_value)
        for parameterTemplate_id in processRunId_to_parameterTemplateId[process_id]:
            edge_value = [ingredient_id, parameterTemplate_id, 'ingredientRun_to_parameterTemplate']
            writer.writerow(edge_value)

In [136]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        ingredient_id = n.value['uids']['auto']
        process_id = n.value['process']['id']
        if len(processRunId_to_conditionTemplateId[process_id]) == 0 \
           and len(processRunId_to_parameterTemplateId[process_id]) == 0: 
            edge_value = [ingredient_id, process_id, 'ingredientRun_to_processRun']
            writer.writerow(edge_value)

In [137]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for spec_id, category_ids in processRunId_to_parameterCategoryId.items():
        for category_id in category_ids:
            edge_value = [category_id, process_id, 'category_to_processRun']
            writer.writerow(edge_value)
    for spec_id, category_ids in processRunId_to_conditionCategoryId.items():
        for category_id in category_ids:
            edge_value = [category_id, process_id, 'category_to_processRun']
            writer.writerow(edge_value)

### process run -> material run ###

In [138]:
material_run_expr = parse('$[?(@.type == "material_run")]')
nodes = material_run_expr.find(json_data)

In [139]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        material_id = n.value['uids']['auto']
        if n.value['process']:
            process_id = n.value['process']['id']
            edge_value = [process_id, material_id, 'processRun_to_materialRun']
            writer.writerow(edge_value)

### measurement template -> measurement spec ###

In [140]:
measurement_spec_expr = parse('$[?(@.type == "measurement_spec")]')
nodes = measurement_spec_expr.find(json_data)

In [141]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        spec_id = n.value['uids']['auto']
        template_id = n.value['template']['id']
        edge_value = [template_id, spec_id, 'measurementTemplate_to_measurementSpec']
        writer.writerow(edge_value)

### measurement spec -> measurement run ###

In [142]:
measurement_run_expr = parse('$[?(@.type == "measurement_run")]')
nodes = measurement_run_expr.find(json_data)

In [143]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        run_id = n.value['uids']['auto']
        if n.value['spec']:
            spec_id = n.value['spec']['id']
            edge_value = [spec_id, run_id, 'measurementSpec_to_measurementRun']
            writer.writerow(edge_value)

### measurement run -> material run ###

In [144]:
measurement_run_expr = parse('$[?(@.type == "measurement_run")]')
nodes = measurement_run_expr.find(json_data)

In [145]:
with open("soren_NYO_gemd_model_dumps/edges.csv", "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    for n in nodes:
        measurement_id = n.value['uids']['auto']
        if n.value['material']:
            material_id = n.value['material']['id']
            edge_value = [measurement_id, material_id, 'measurementRun_to_materialRun']
            writer.writerow(edge_value)