In [1]:
from neo4j import GraphDatabase, basic_auth
import neo4j
import pandas as pd
import numpy as np
import time
import os
from dotenv import load_dotenv
from pathlib import Path

In [2]:
#load the environment variables
dotenv_path = Path('~/.env')
load_dotenv(dotenv_path=dotenv_path)  # This line brings all environment variables from .env into os.environ

# Get variables
SUSTAINGRAPH_URI = os.getenv('SUSTAINGRAPH_URI')
SUSTAINGRAPH_USER = os.getenv('SUSTAINGRAPH_USER')
SUSTAINGRAPH_PASSWORD = os.getenv('SUSTAINGRAPH_PASSWORD')
database_name = os.getenv('DATABASE_NAME')

# Connect to database
driver = GraphDatabase.driver(SUSTAINGRAPH_URI, auth=(SUSTAINGRAPH_USER, SUSTAINGRAPH_PASSWORD))

# Verify connectivity
with driver.session(database=database_name) as session:
    print(session.run("RETURN 'Connected to ' + $db", db=database_name).single()[0])

Connected to neo4j


### Constraints

In [3]:
def create_constraint(tx,statement):
    tx.run(statement)

constraints = [
    """CREATE CONSTRAINT sixt_unique IF NOT EXISTS FOR (n:Transformation) REQUIRE (n.number,n.title) IS NODE KEY""",
    """CREATE CONSTRAINT ministry_unique IF NOT EXISTS FOR (n:Ministry) REQUIRE n.title IS NODE KEY""",
    """CREATE CONSTRAINT intervention_unique IF NOT EXISTS FOR (n:Intervention) REQUIRE n.title IS NODE KEY""",
    """CREATE CONSTRAINT intoutput_unique IF NOT EXISTS FOR (n:IntermediateOutput) REQUIRE n.title IS NODE KEY""",
    """CREATE CONSTRAINT sixt_title_type IF NOT EXISTS FOR (n:Transformation) REQUIRE n.title IS :: STRING""",
    """CREATE CONSTRAINT sixt_number_type IF NOT EXISTS FOR (n:Transformation) REQUIRE n.number IS :: INTEGER""",
    """CREATE CONSTRAINT ministry_number_type IF NOT EXISTS FOR (n:Ministry) REQUIRE n.title IS :: STRING""",
    """CREATE CONSTRAINT intervention_number_type IF NOT EXISTS FOR (n:Intervention) REQUIRE n.title IS :: STRING""",
    """CREATE CONSTRAINT intoutput_number_type IF NOT EXISTS FOR (n:IntermediateOutput) REQUIRE n.title IS :: STRING""" 
]

with driver.session(database=database_name) as session:
    for statement_constraint in constraints:
        session.execute_write(create_constraint, statement_constraint)

### Write batch function

In [4]:
def write_batch(tx,statement, params_list):
    tx.run(statement, parameters={"parameters": params_list})

### Import 6Transformations

The six SDG Transformations are modular building blocks of SDG achievement: 

- education, gender, and inequality
- health, well-being, and demography
- energy decarbonization and sustainable industry
- sustainable food, land, water, and oceans
- sustainable cities and communities
- digital revolution for sustainable development

Each Transformation identifies priority investments and regulatory challenges, calling for actions by well-defined parts of government working with business and civil society. 

Based on the [6T article](https://www.nature.com/articles/s41893-019-0352-9.epdf?author_access_token=PYxHIfTzicPDZ1f8Mpi4ddRgN0jAjWel9jnR3ZoTv0OUvCcY5pZ8AaTx0MkoopkKOS7OzdwrSBL-nqy90SNoGgHmldD2otpknRagcTqK2IJMLpfAw86QRMHl3QEjytXGWz5FGotx9W9u1jWK0QbJVw%3D%3D), it is provided how each transformation comprises key SDG interventions, that together generate intermediate outputs which serve as inputs into achieving the SDGs. Furthermore, Table 1 of the [6T article](https://www.nature.com/articles/s41893-019-0352-9.epdf?author_access_token=PYxHIfTzicPDZ1f8Mpi4ddRgN0jAjWel9jnR3ZoTv0OUvCcY5pZ8AaTx0MkoopkKOS7OzdwrSBL-nqy90SNoGgHmldD2otpknRagcTqK2IJMLpfAw86QRMHl3QEjytXGWz5FGotx9W9u1jWK0QbJVw%3D%3D) lists the line ministries that would oversee the implementation of each Transformation.

In the folder Data, the respective excel file contains the data related to the 6Transformations, i.e. sheet:info --> transformations and their titles or  sheet: Ministry --> ministries an their association with the transformations.


In [5]:
## 6Transformation nodes
df_6t = pd.read_excel('Data/5.PolicyFramework_6Transformations.xlsx',sheet_name ='Info')

## Interventions & Intermediate Outputs
df_int = pd.read_excel('Data/5.PolicyFramework_6Transformations.xlsx',sheet_name ='IntOutput')

## Association with SDGs
transf_dict = {'1':'enables the SDG','2':'reinforces the SDG','3':'directly targets the SDG'}

df_int_sdg = pd.read_excel('Data/5.PolicyFramework_6Transformations.xlsx',sheet_name ='SDG_Weights')
df = df_int_sdg.loc[df_int_sdg['Weight']!=0]

## Association with ministries
df_min = pd.read_excel('Data/5.PolicyFramework_6Transformations.xlsx',sheet_name ='Ministry')


statement_sixt = """
    UNWIND $parameters as row
    MERGE (t:Transformation{title:row.title,number:row.number})
    """

statement_sixtpath = """
    UNWIND $parameters as row
    MATCH (t:Transformation{number:row.number})
    MERGE (i:Intervention{title:row.int_title})
    MERGE (int:IntermediateOutput{title:row.io_title})
    MERGE (t)-[:COMPRISES]->(i)
    MERGE (i)-[:CONTRIBUTES_TO]->(int)
    """

statement_sdg_sixt = """
    UNWIND $parameters as row
    MATCH (g:Goal{code:row.number})
    MATCH (int:IntermediateOutput{title:row.io_title})
    MERGE (int)-[:ASSOCIATED_WITH{weight:row.weight,description:row.desc}]->(g)
    """
statement_ministry = """
    UNWIND $parameters as row
    MATCH (t:Transformation{number:row.number})
    MERGE (m:Ministry{title:row.m_title})
    MERGE (m)-[:OVERSEE_THE_IMPLEMENTATION_OF]->(t)
    """

params=[]
with driver.session(database=database_name) as session:
    for index, row in df_6t.iterrows():
        params_dict={'title':str(row['Title']),'number':int(row['Transformation'])}
        params.append(params_dict)
    st = time.time()  # Record start time for the last batch
    session.execute_write(write_batch, params_list = params,statement = statement_sixt)
    et = time.time()
    elapsed_time = et - st
    print('{} observations: Done! ({} minutes)'.format(len(params), elapsed_time/60))

params=[]
with driver.session(database=database_name) as session:
    for index, row in df_int.iterrows():
        params_dict={'number':int(row['Transformation']),'int_title':str(row['Interventions']),
                 'io_title':str(row['IntermediateOutput'])}
        params.append(params_dict)
    st = time.time()  # Record start time for the last batch
    session.execute_write(write_batch, params_list = params,statement = statement_sixtpath)
    et = time.time()
    elapsed_time = et - st
    print('{} observations: Done! ({} minutes)'.format(len(params), elapsed_time/60))
    
params=[]
with driver.session(database=database_name) as session:
    for index, row in df.iterrows():
        params_dict={'number':str(row['SDG']),'weight':int(row['Weight']),'desc':transf_dict[str(row['Weight'])],
                 'io_title':str(row['Outputs'])}
        params.append(params_dict)
    st = time.time()  # Record start time for the last batch
    session.execute_write(write_batch, params_list = params,statement = statement_sdg_sixt)
    et = time.time()
    elapsed_time = et - st
    print('{} observations: Done! ({} minutes)'.format(len(params), elapsed_time/60))

params=[]
with driver.session(database=database_name) as session:
    for index, row in df_min.iterrows():
        params_dict={'number':int(row['Transformation']),
                 'm_title':str(row['Ministry'])}
        params.append(params_dict)
    st = time.time()  # Record start time for the last batch
    session.execute_write(write_batch, params_list = params,statement = statement_ministry)
    et = time.time()
    elapsed_time = et - st
    print('{} observations: Done! ({} minutes)'.format(len(params), elapsed_time/60))


6 observations: Done! (0.001082146167755127 minutes)
24 observations: Done! (0.0011292894681294758 minutes)
173 observations: Done! (0.001313630739847819 minutes)
19 observations: Done! (0.0007598121960957845 minutes)


> Check cypher query

In [None]:
records, summary, keys = driver.execute_query("""\
       match (t:Transformation) RETURN COUNT(DISTINCT t) as sixt
        """,routing_="r",database_=database_name)
print("{rels} Transformations (expected:{expected}) in {time} ms.".format(
    rels=records[0]['sixt'],
    time=summary.result_available_after,
    expected = len(df_6t)
))


records, summary, keys = driver.execute_query("""\
       match (t:Intervention) RETURN COUNT(DISTINCT t) as sixt
        """,routing_="r",database_=database_name)
print("{rels} Intervention (expected:{expected}) in {time} ms.".format(
    rels=records[0]['sixt'],
    time=summary.result_available_after,
    expected = len(df_int)
))

records, summary, keys = driver.execute_query("""\
       MATCH (int:IntermediateOutput)-[r:ASSOCIATED_WITH]->(g:Goal) return count(distinct r) as rel
        """,routing_="r",database_=database_name)
print("{rels} IntermediateOutput-ASSOCIATED_WITH-Goal (expected:{expected}) in {time} ms.".format(
    rels=records[0]['rel'],
    time=summary.result_available_after,
    expected = len(df)
))

records, summary, keys = driver.execute_query("""\
       MATCH (min:Ministry)-[r:OVERSEE_THE_IMPLEMENTATION_OF]->(t) return count(distinct r) as mins
        """,routing_="r",database_=database_name)
print("{rels} Ministries-OVERSEE_THE_IMPLEMENTATION_OF-Transformations (expected:{expected}) in {time} ms.".format(
    rels=records[0]['mins'],
    time=summary.result_available_after,
    expected = len(df_min)
))

6 Transformations (expected:6) in 20 ms.
24 Intervention (expected:24) in 12 ms.
173 IntermediateOutput-ASSOCIATED_WITH-Goal (expected:173) in 19 ms.
19 Ministries-OVERSEE_THE_IMPLEMENTATION_OF-Transformations (expected:19) in 24 ms.
