In [None]:
# Download and install Python packages needed for this Jupyter Notebook

!pip install neo4j pyalex

In [None]:
# This imports the Python packages needed for this Jupyter Notebook 

# Note: 'ast' 'json' and 'os' are part of the Python Standard Library
# If not already included in your Python installer, 
# they will need to be installed manually 

import pyalex
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers
import json 
from neo4j import GraphDatabase
import os
import ast

In [None]:
# The block connect the Jupyter Notebook to your Neo4j Database
# Note: Your Neo4j Database must be running and accepting connections
# Note: This example is for connecting to a local instance of Neo4j
# More information on interfacing with can be found at
# https://neo4j.com/docs/python-manual/current/connect/

uri = 'bolt://localhost:7687'
username = 'neo4j'
password = 'password'
driver = GraphDatabase.driver(uri, auth=(username, password))




In [None]:
# This block is to provide your email address to OpenAlex for best performance
# More information can be found at https://docs.openalex.org/

pyalex.config.email = "Enter your email address here"

In [None]:
# This block will retrieve all works from the OpenAlex Concept ID 
# provided. Hitting enter will select "Classroom Design", which 
# contains a single page with 99 Works entries. Some broader concepts 
#like "Computer Science" can contain tens of thousands of Works. 

concept_input = input('Please enter an OpenAlex Concept ID.\
 Hit enter for C2991895030 - "Classroom design" which will\
 return one page with 99 Works: ') 

if concept_input == "":
    concept_input = "C2991895030"

with driver.session() as session:
    
    # Returns path for neo4j instance running
    neodir = session.run('CALL dbms.listConfig() YIELD name, value WHERE \
        name = \'server.directories.import\' RETURN value').values()
    path = neodir[0][0] 

    os.chdir(path)
    
    pager = Works().filter(concept={"id" : {concept_input}}).paginate(\
        per_page=200, n_max=None)

    page_count = 1

    for page in pager:
        file = concept_input+"_Page_"+str(page_count)
        out_file = open(file, "w")
        json.dump(page, out_file, indent = 6)
        out_file.close()
        #print(page)
        print("Now Downloading Page " + str(page_count) + \
              " For Concept ID " + concept_input)
        page_count += 1
        
    directory_list = os.listdir(path)


In [None]:
# This block creates indexes on the following properties to greatly 
#speed data import and data queries

driver.execute_query('CREATE INDEX Institutions IF NOT EXISTS FOR \
    (i:Institutions) ON (i.id)')
driver.execute_query('CREATE INDEX Concept IF NOT EXISTS FOR \
    (i:Concept) ON (i.id)')
driver.execute_query('CREATE INDEX Work_ID IF NOT EXISTS FOR \
    (i:Work) ON (i.id)')
driver.execute_query('CREATE INDEX Author IF NOT EXISTS FOR \
    (i:Author) ON (i.id)')



In [None]:
# This block imports the files previously downloaded to create Works nodes
# containing all information fields retrieved from OpenAlex

with driver.session() as session:
    
    # Returns path for neo4j instance running
    neodir = session.run('CALL dbms.listConfig() YIELD name, value \
    WHERE name = \'server.directories.import\' RETURN value').values()
    path = neodir[0][0] 
    #print(path)
    
    os.chdir(path)


directory_list = sorted(os.listdir(path))
    
for file in directory_list:
    if not file.startswith('.'): 
        print("File being imported: " + file)
        work_node_creation =  \
            "CALL apoc.periodic.iterate(\"CALL apoc.load.json(\'file:///" \
            + file + "') YIELD value\",\"MERGE (w:Work {id: value.id}) \
            SET w.source = \'OpenAlex\',  \
            w.cited_by_api_url = coalesce(value.cited_by_api_url, \'\'),\
            w.cited_by_count = coalesce(value.cited_by_count, \'\'), \
            w.pass = 1, w.corresponding_author_ids  = \
            coalesce(value.corresponding_author_ids , \'\'), \
            w.corresponding_institution_ids  = \
            coalesce(value.corresponding_institution_ids , \'\'), \
            w.created_date = coalesce(value.created_date, \'\'), \
            w.display_name = coalesce(value.display_name, \'\'), \
            w.doi = coalesce(value.doi, \'\'), \
            w.is_paratext = coalesce(value.is_paratext, \'\'), \
            w.is_retracted = coalesce(value.is_retracted, \'\'), \
            w.language = coalesce(value.language, \'\'), \
            w.locations_count = coalesce(value.locations_count, \'\'), \
            w.ngrams_url = coalesce(value.ngrams_url, \'\'), \
            w.publication_date = coalesce(value.publication_date, \'\'), \
            w.publication_year = coalesce(value.publication_year, \'\'), \
            w.publication_year = coalesce(value.publication_year, \'\'), \
            w.title = coalesce(value.title, \'\'), \
            w.type = coalesce(value.type, \'\'), \
            w.updated_date = coalesce(value.updated_date, \'\'), \
            w.is_oa = coalesce(value.is_oa, \'\'), \
            w.license = coalesce(value.license, \'\'), \
            w.url = coalesce(value.url, \'\'), \
            w.version = coalesce(value.version, \'\') WITH w, value \
            CALL apoc.convert.setJsonProperty(w, \'inverted_abstract\', \
            value.abstract_inverted_index) \
            CALL apoc.convert.setJsonProperty(\
            w, \'authorships\', value.authorships) \
            CALL apoc.convert.setJsonProperty(w, \'apc_payment\', \
            value.apc_payment) CALL apoc.convert.setJsonProperty(\
            w, \'best_oa_location\', value.best_oa_location) \
            CALL apoc.convert.setJsonProperty(\
            w, \'biblio\', value.biblio) \
            CALL apoc.convert.setJsonProperty(\
            w, \'concepts\', value.concepts) \
            CALL apoc.convert.setJsonProperty(\
            w, \'counts_by_year\', value.counts_by_year) \
            CALL apoc.convert.setJsonProperty(\
            w, \'grants\', value.grants) \
            CALL apoc.convert.setJsonProperty(\
            w, \'ids\', value.ids) \
            CALL apoc.convert.setJsonProperty(\
            w, \'locations\', value.locations) \
            CALL apoc.convert.setJsonProperty(\
            w, \'mesh\', value.mesh) \
            CALL apoc.convert.setJsonProperty(\
            w, \'open_access\', value.open_access) \
            CALL apoc.convert.setJsonProperty(\
            w, \'primary_location\', value.primary_location) \
            CALL apoc.convert.setJsonProperty(\
            w, \'referenced_works\', value.referenced_works) \
            CALL apoc.convert.setJsonProperty(\
            w, \'related_works\', \
            value.related_works)\",{ batchSize: 200, \
            parallel: true, retries: 2} ) \
            YIELD batches, total, operations"

        #Uncomment the print command below to view the raw Cypher script used by Neo4j
        #print(work_node_creation)
        
        record, summary, keys = driver.execute_query(work_node_creation)
        print("Operations executed during file import - " + str(record[0][2]))
        print("File - " + file + " import complete")

print("All works imported")


In [None]:
#This block retrieves works referenced by existing Works nodes and 
# creates a REFERENCED_WORK relationship
#If the referenced work does not exist it is created using the
# id retrieved from the list of "referenced_works"
#These works are identified with a 2 in the "pass" property to 
# allow processing to retrieve all information about the work
 
referenced_node_creation = "CALL apoc.periodic.iterate(\"MATCH (w:Work) RETURN w\",\"WITH \
    apoc.convert.fromJsonList(w.referenced_works) AS ref_works,w \
    UNWIND ref_works AS ref_work WITH ref_work,w \
    MERGE (z:Work {id: ref_work}) SET z.pass = \
    CASE WHEN any (x in z.pass WHERE x = 1) \
    THEN z.pass ELSE 2 END WITH ref_work, z, w \
    MERGE (z)<-[:REFERENCED_WORK]-(w)\",{batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(referenced_node_creation)

record, summary, keys = driver.execute_query(referenced_node_creation)
print(record[0][8])
print("Referenced work relationships creation complete")



In [None]:
#This block retrieves works related to existing Works nodes and 
# creates a RELATED_WORK relationship
#If the related work does not exist it is created using the
# id retrieved from the list of "related_works"
#These works are identified with a 2 in the "pass" property to 
# allow processing to retrieve all information about the work

related_node_creation = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) RETURN w\",\"WITH \
    apoc.convert.fromJsonList(w.related_works) AS rel_works,w \
    UNWIND rel_works AS rel_work WITH rel_work,w \
    MERGE (z:Work {id: rel_work}) SET z.pass = \
    CASE WHEN any (x in z.pass WHERE x = 1) THEN z.pass \
    ELSE 2 END WITH rel_work, z, w WHERE z.id <> w.id \
    MERGE (z)<-[:RELATED_WORK]-(w)\",{batchSize:200, parallel:false})"  

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(related_node_creation)

record, summary, keys = driver.execute_query(related_node_creation)
print(record[0][8])
print("Related work relationships creation complete")

In [None]:
#This block retrieves concepts associated with existing Works nodes and 
# creates a ASSOC_CONCEPT relationship
#If the associated concept does not exist it is created using the
# id retrieved from the list of "concepts"
#Concepts are identified with a 1 in the "pass" property to 
# allow processing to retrieve all information about the concept

concept_node_creation = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) RETURN w\",\"WITH \
    apoc.convert.fromJsonList(w.concepts) AS concepts,w UNWIND \
    concepts AS concept MERGE (c:Concept {id: concept.id}) SET \
    c.pass = 1, \
    c.score = concept.score, \
    c.level = concept.level, \
    c.display_name = concept.display_name, \
    c.wikidata = concept.wikidata \
    MERGE (c)<-[:ASSOC_CONCEPT]-(w)\",{batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(concept_node_creation)

record, summary, keys = driver.execute_query(concept_node_creation)
print(record[0][8])
print("Concept import complete")


In [None]:
#This block retrieves authors of existing Works nodes and 
# creates a WROTE relationship that includes a "author_position"
# property to identify the position of the author's name
#If the author does not exist it is created using the
# id retrieved from the list of "authorships"
#Newly crated authors are identified with a 1 in the "pass" property to 
# allow processing to retrieve all information about the author

author_node_creation = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) RETURN w\",\"WITH \
    apoc.convert.fromJsonList(w.authorships) AS ships,w \
    UNWIND ships AS ship MERGE (a:Author {id: ship.author.id}) \
    SET a.institution = [] WITH a,ship,w SET \
    a.source = \'OpenAlex\', \
    a.pass = 1, \
    a.display_name = ship.author.display_name, \
    a.orcid = ship.author.orcid, a.institution = \
    CASE WHEN any (x in a.institution WHERE x = ship.institutions[0].id) \
    THEN a.institution ELSE \
    a.institution + coalesce(ship.institutions[0].id,'')\
    END MERGE (a)-[:WROTE {author_position: ship.author_position}]->(w)\",\
    {batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(author_node_creation)

record, summary, keys = driver.execute_query(author_node_creation)
print(record[0][8])
print("Author import complete")

In [None]:
#This block retrieves the institutions of authors from existing Works nodes and 
# creates an institution node if one does not already exisit with 
 # "display_name", "country_code", "ror" and "type" properties
#Newly created institutions are identified with a 1 in the "pass" property to 
# allow processing to retrieve all information about the institution

institution_node_creation = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) RETURN w\",\"WITH \
    apoc.convert.fromJsonList(w.authorships) AS ships,w UNWIND ships AS ship\
    MERGE (i:Institutions {id: coalesce(ship.institutions[0].id,\'\')}) \
    SET i.source = \'OpenAlex\', \
    i.pass = 1, \
    i.display_name = ship.institutions[0].display_name, \
    i.country_code = ship.institutions[0].country_code, \
    i.ror = ship.institutions[0].ror, \
    i.type = ship.institutions[0].type\",\
    {batchSize:200, parallel:false})"                           


#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(institution_node_creation)

record, summary, keys = driver.execute_query(institution_node_creation)
print(record[0][8])
print("Institutions import complete")

In [None]:
#This block retrieves creates an AFFILIATED_WITH relationship between
    # Author and Institution nodes on matches between both

institution_relationship_creation = \
    "CALL apoc.periodic.iterate(\"MATCH (a:Author) RETURN a\",\"UNWIND \
    a.institution AS inst WITH inst,a WHERE inst <> \'\' MATCH (i:Institutions) \
    WHERE i.id = inst MERGE (a)-[:AFFILIATED_WITH]->(i)\", \
    {batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(institution_relationship_creation)

record, summary, keys = driver.execute_query(institution_relationship_creation)
print(record[0][8])
print("Author institution relationships creation complete")

In [None]:
#This block transforms the inverted_abstract for each Work node into
# a regular abstract using pyalex's invert_abstract function

records, summary, keys = driver.execute_query(
    "MATCH (n:Work) WHERE n.inverted_abstract IS NOT NULL AND \
        n.inverted_abstract <> 'null' RETURN n.inverted_abstract, n.id")

work_abstract_count = 0 

# Loop through retrieved works to convert inverted abstracts to
    # regular abstracts 

for record in records:  
    node_id = record.data('n.id').get('n.id')

    try:
        phase1_abstract = '{' + record.data('n.inverted_abstract').get( \
            'n.inverted_abstract')[1:-1] + '}'
        phase2_abstract = ast.literal_eval(phase1_abstract)
        #print(phase2_abstract)
        driver.execute_query("MATCH (w:Work {id: $id}) \
            SET w += {abstract: $abstract}", \
            id = node_id,abstract = pyalex.invert_abstract(phase2_abstract))
        work_abstract_count += 1 
    except:
        print("An exception occurred for Work id - " + node_id )
        print("Here is the abstract for this work:")
        print(record.data('n.inverted_abstract').get('n.inverted_abstract'))

works_with_data_imported, summary, keys = driver.execute_query(
    "MATCH (n:Work) WHERE n.display_name IS NOT NULL RETURN COUNT(n)")

print(str(work_abstract_count) + " Works have abstracts out of " \
    + str(works_with_data_imported[0][0]) + \
    " Works with data imported")

print(str(len(records) - work_abstract_count) + \
    " Works with an inverted abstract failed conversion" +
    " to a normal abstract")

#Retrieving Metadata for Referenced and Related Works

Scripting below this block retrieves information for Reference and Related works that were created earlier, but were not included in the original download of Works.

API calls are restricted to 10 per second and 100,000 per day. 
More information can be found at - https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication

In [None]:
# This block retrieves information from OpenAlex in batches of 60 
#on Related and Referenced Works created earlier that were not included 
#in the initial download of requested works.

# This greatly expands the scope of Works considered
# From the "Classroom Design" example, 99 Works originally imported have
# 1700 additional Works Related to or Referenced, which will be imported

# In some cases retrieval of a specified Work (based on ID) returns information 
#for another Work ID. This appears to be when the original Work was deleted 
#from the OpenAlex database

records_for_work_retrieval_start, summary_for_work_retrieval_start, \
    keys_for_work_retrieval_start = driver.execute_query(\
    "MATCH (n:Work) WHERE n.display_name IS NULL RETURN COUNT(n)")

works_without_information = records_for_work_retrieval_start[0][0]

print("A total of " + str(works_without_information) + \
      " Works will be retrieved")

while works_without_information > 0:
    records_for_work_retrieval_start, summary_for_work_retrieval_start, \
        keys_for_work_retrieval_start = driver.execute_query(\
        "MATCH (n:Work) WHERE n.display_name IS NULL RETURN COUNT(n)")
   
    works_without_information = records_for_work_retrieval_start[0][0]
    print("Processing work number " + str(works_without_information) + \
          " in increments of 60")

    records, summary, keys = driver.execute_query(
        "MATCH (n:Work) WHERE n.display_name IS NULL AND \
        n.pass = 2 WITH substring(n.id,21,32) AS work_id \
        RETURN work_id LIMIT 60")
     
    # Loop through results returned and retrieve Work information from OpenAlex for each
    for record in records:  
        try:
            records, summary, keys = driver.execute_query( \
                "WITH '" + record.data('work_id').get('work_id') + \
                "' AS work_id CALL apoc.load.jsonParams(\
                'https://api.openalex.org/works/' + work_id, null, null) \
                YIELD value MATCH (w:Work) WHERE w.id CONTAINS work_id \
                SET w.display_name = CASE \
                WHEN value.id CONTAINS work_id \
                THEN value.title ELSE coalesce('Deleted Work', '') END, \
                w.pass = CASE WHEN value.id CONTAINS work_id \
                THEN 3 ELSE 999 END, \
                w.source = \'OpenAlex\', \
                w.cited_by_api_url = coalesce(value.cited_by_api_url, ''), \
                w.cited_by_count = coalesce(value.cited_by_count, ''), \
                w.corresponding_author_ids  = \
                coalesce(value.corresponding_author_ids , ''), \
                w.corresponding_institution_ids  = \
                coalesce(value.corresponding_institution_ids , ''), \
                w.created_date = coalesce(value.created_date, ''), \
                w.display_name = coalesce(value.display_name, ''), \
                w.doi = coalesce(value.doi, ''), \
                w.is_paratext = coalesce(value.is_paratext, ''), \
                w.is_retracted = coalesce(value.is_retracted, ''), \
                w.language = coalesce(value.language, ''), \
                w.locations_count = coalesce(value.locations_count, ''), \
                w.ngrams_url = coalesce(value.ngrams_url, ''), \
                w.publication_date = coalesce(value.publication_date, ''), \
                w.publication_year = coalesce(value.publication_year, ''), \
                w.publication_year = coalesce(value.publication_year, ''), \
                w.title = coalesce(value.title, ''), \
                w.type = coalesce(value.type, ''), \
                w.updated_date = coalesce(value.updated_date, ''), \
                w.is_oa = coalesce(value.is_oa, ''), \
                w.license = coalesce(value.license, ''), \
                w.url = coalesce(value.url, ''), \
                w.version = coalesce(value.version, '') WITH w, value CALL \
                apoc.convert.setJsonProperty(\
                w, 'inverted_abstract', value.abstract_inverted_index) \
                CALL apoc.convert.setJsonProperty(\
                w, 'authorships', value.authorships) \
                CALL apoc.convert.setJsonProperty(\
                w, 'apc_payment', value.apc_payment) \
                CALL apoc.convert.setJsonProperty(\
                w, 'best_oa_location', value.best_oa_location) \
                CALL apoc.convert.setJsonProperty(\
                w, 'biblio', value.biblio) \
                CALL apoc.convert.setJsonProperty(\
                w, 'concepts', value.concepts) \
                CALL apoc.convert.setJsonProperty(\
                w, 'counts_by_year', value.counts_by_year) \
                CALL apoc.convert.setJsonProperty(\
                w, 'grants', value.grants) \
                CALL apoc.convert.setJsonProperty(\
                w, 'ids', value.ids) \
                CALL apoc.convert.setJsonProperty(\
                w, 'locations', value.locations) \
                CALL apoc.convert.setJsonProperty(\
                w, 'mesh', value.mesh) \
                CALL apoc.convert.setJsonProperty(\
                w, 'open_access', value.open_access) \
                CALL apoc.convert.setJsonProperty(\
                w, 'primary_location', value.primary_location) \
                CALL apoc.convert.setJsonProperty(\
                w, 'referenced_works', value.referenced_works) \
                CALL apoc.convert.setJsonProperty(\
                w, 'related_works', value.related_works)")

        except: 
            records, summary, keys = driver.execute_query( \
                "WITH '" + \
                record.data('work_id').get('work_id') + "' AS work_id \
                MATCH (w:Work) WHERE w.id CONTAINS work_id \
                SET w.display_name = 'Work Not Found', \
                w.pass = 999" \
                )
            print("Work was not found for ID: " + \
                  str(record.data('work_id').get('work_id')))

In [None]:
#This block retrieves concepts associated with Works nodes previously imported
# and creates a ASSOC_CONCEPT relationship
#If the associated concept does not exist it is created using the
# id retrieved from the list of "concepts"
#Concepts are identified with a 3 in the "pass" property to 
# allow processing to retrieve all information about the concept

concept_node_creation2 = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) WHERE \
    w.pass = 3 RETURN w\", \
    \"WITH apoc.convert.fromJsonList(w.concepts) AS concepts,w \
    UNWIND concepts AS concept \
    MERGE (c:Concept {id: concept.id}) \
    SET c.source = \'OpenAlex\', \
    c.pass = 3, \
    c.score = concept.score, \
    c.level = concept.level, \
    c.display_name = concept.display_name, \
    c.wikidata = concept.wikidata \
    MERGE (c)<-[:ASSOC_CONCEPT]-(w)\", \
    {batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(concept_node_creation2)

record, summary, keys = driver.execute_query(concept_node_creation2)
print(record[0][8])
print("2nd Round Concept import complete")

In [None]:
#This block retrieves authors of existing Works nodes previously imported 
# and creates a WROTE relationship that includes a "author_position"
# property to identify the position of the author's name
#If the author does not exist it is created using the
# id retrieved from the list of "authorships"
#Newly crated authors are identified with a 3 in the "pass" property to 
# allow processing to retrieve all information about the author

author_node_creation2 = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) \
    WHERE w.pass = 3 RETURN w\",\
    \"WITH apoc.convert.fromJsonList(w.authorships) AS ships,w \
    UNWIND ships AS ship MERGE (a:Author {id: ship.author.id}) \
    SET a.institution = [] WITH a,ship,w SET \
    a.source = \'OpenAlex\', \
    a.pass = 3, \
    a.display_name = ship.author.display_name, \
    a.orcid = ship.author.orcid, \
    a.institution = CASE WHEN any \
    (x in a.institution WHERE x = ship.institutions[0].id) \
    THEN a.institution ELSE \
    a.institution + coalesce(ship.institutions[0].id,'')\
    END MERGE (a)-[:WROTE {author_position: ship.author_position}]->(w)\",\
    {batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(author_node_creation2)

record, summary, keys = driver.execute_query(author_node_creation2)
print(record[0][8])
print("2nd Round Author creation complete")

In [None]:
#This block retrieves the institutions of authors from existing Works nodes 
# previously imported and creates an institution node if one does not already 
# exisit with "display_name", "country_code", "ror" and "type" properties
#Newly created institutions are identified with a 3 in the "pass" property to 
# allow processing to retrieve all information about the institution

institution_node_creation2 = \
    "CALL apoc.periodic.iterate(\"MATCH (w:Work) \
    WHERE w.pass = 3 RETURN w\", \
    \"WITH apoc.convert.fromJsonList(w.authorships) AS ships,w \
    UNWIND ships AS ship \
    MERGE (i:Institutions {id: coalesce(ship.institutions[0].id,\'\')}) \
    SET i.source = \'OpenAlex\', \
    i.pass = 3, \
    i.display_name = ship.institutions[0].display_name, \
    i.country_code = ship.institutions[0].country_code, \
    i.ror = ship.institutions[0].ror, \
    i.type = ship.institutions[0].type\", \
    {batchSize:200, parallel:false})"      

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(institution_node_creation2)

record, summary, keys = driver.execute_query(institution_node_creation2)
print(record[0][8])
print("2nd Round Institution creation complete")

In [None]:
#This block retrieves creates an AFFILIATED_WITH relationship between
    # Author and Institution nodes on matches between both

institution_relationship_creation2 = \
    "CALL apoc.periodic.iterate(\"MATCH (a:Author) WHERE a.pass = 3 \
    RETURN a\",\"UNWIND a.institution AS inst WITH inst,a \
    WHERE inst <> \'\' MATCH (i:Institutions) \
    WHERE i.id = inst MERGE (a)-[:AFFILIATED_WITH]->(i)\", \
    {batchSize:200, parallel:false})"

#Uncomment the print command below to view the raw Cypher script used by Neo4j
#print(institution_relationship_creation2)

record, summary, keys = \
    driver.execute_query(institution_relationship_creation2)
print(record[0][8])
print("2nd Round Institution Author Relationship creation complete")

In [None]:
#This block transforms the inverted_abstract for each Work node into
# a regular abstract using pyalex's invert_abstract function

records, summary, keys = driver.execute_query(
    "MATCH (n:Work) WHERE n.pass = 3 AND n.inverted_abstract IS NOT NULL \
    AND n.inverted_abstract <> 'null' RETURN n.inverted_abstract, n.id")

works_abstract_count2 = 0 

for record in records:  
    node_id = record.data('n.id').get('n.id')
    
    try:
        phase1_abstract = '{' + record.data('n.inverted_abstract').get( \
            'n.inverted_abstract')[1:-1] + '}'
        phase2_abstract = ast.literal_eval(phase1_abstract)
        driver.execute_query("MATCH (w:Work {id: $id}) \
            SET w += {abstract: $abstract}", \
            id = node_id,abstract = pyalex.invert_abstract(phase2_abstract))
        works_abstract_count2 += 1 
    except:
        print("An exception occurred for Work id - " + node_id )
        print("Here is the abstract for this work:")
        print(record.data('n.inverted_abstract').get('n.inverted_abstract'))

works_with_data_imported, summary, keys = driver.execute_query(
    "MATCH (n:Work) WHERE n.display_name IS NOT NULL RETURN COUNT(n)")

print(str(works_abstract_count2) + " Works have abstracts out of " + \
    str(works_with_data_imported[0][0]) + \
    " Works with data imported")

print(str(len(records) - works_abstract_count2) + \
    " Works with an inverted abstract failed conversion" +
    " to a normal abstract")