In [1]:
# The purpose of this notebook is to Convert neo4j-JKG-export-all-output to less verbose JSON Knowledge Graph.
# For large graphs (e.g., millions of items) the type, id, start and end node properties can massively increase the file.
# In some cases this prevents an extracted graph from re-ingesting due to time-out.
# This notebook simply deletes the keys neo4j adds in json-export-all and only retains the start and end node properties' id needed for JKG rels.

In [2]:
# Import python3 modules
import os
import json as json
import ijson

In [3]:
# Stream JSON KG from local directory FROM (source_out.json - from neo4j output format) and write back TO (source_in.json - in JKG format)

file = open("source_in.json", "w")
file.write('{"nodes":[')

with open('source_out.json', 'rb') as f:
    first = True
    for item in ijson.items(f, 'nodes.item'):
        del item['type']
        del item['id']
        if first:
            file.write('\n')
            first = False
        else:
            file.write('\n,')
        file.write(json.dumps(item))
        
file.write('\n],"rels":[')

with open('source_out.json', 'rb') as f:
    first = True
    for item in ijson.items(f, 'rels.item'):
        del item['type']
        del item['id']
        del item['start']['id']
        del item['start']['labels']
        del item['end']['id']
        del item['end']['labels']
        # Use comprehension to keep only start and end node properties' id needed for JKG rels
        item['start']['properties'] = {k: v for k, v in item['start']['properties'].items() if k in {'id'}}
        item['end']['properties'] = {k: v for k, v in item['end']['properties'].items() if k in {'id'}}
        if first:
            file.write('\n')
            first = False
        else:
            file.write('\n,')
        file.write(json.dumps(item))
        
file.write(']}')
file.close()