In [1]:
# The purpose of this notebook is to Convert neo4j-JKG-export-all-output to less verbose JSON Knowledge Graph.
# For large graphs (e.g., millions of items) the type, id, start and end node properties can massively increase the file.
# In some cases the prevents an extracted graph from re-ingesting due to time-out.
# This notebook simply deletes the keys neo4j adds in json-export-all and only retains the start and end node property id needed for JKG rels.

In [2]:
# Import python3 modules
import os
import json as json

In [26]:
# Load JSON KG from local directory
with open('source_out.json') as file: JKG = json.load(file)

In [27]:
# Delete guaranteed keys type and id from all nodes and all rels
# Delete guaranteed keys id and labels from all start and end of rels
for node in JKG['nodes']:
    del node['type']
    del node['id']
for rel in JKG['rels']:
    del rel['type']
    del rel['id']
    del rel['start']['id']
    del rel['start']['labels']
    del rel['end']['id']
    del rel['end']['labels']
    # Use comprehension to keep only start and end node property id needed for JKG rels
    rel['start']['properties'] = {k: v for k, v in rel['start']['properties'].items() if k in {'id'}}
    rel['end']['properties'] = {k: v for k, v in rel['end']['properties'].items() if k in {'id'}}

In [28]:
# Write JSON KG to local directory
with open("source_in.json", "w") as file:
    json.dump(JKG, file, separators=(',', ':'))