In [1]:
import pandas as pd
from neo4j import GraphDatabase, basic_auth
import os
from dotenv import load_dotenv
from pathlib import Path

In [2]:
#load the environment variables
dotenv_path = Path('~/.env')
load_dotenv(dotenv_path=dotenv_path)  # This line brings all environment variables from .env into os.environ

# Get variables
SUSTAINGRAPH_URI = os.getenv('SUSTAINGRAPH_URI')
SUSTAINGRAPH_USER = os.getenv('SUSTAINGRAPH_USER')
SUSTAINGRAPH_PASSWORD = os.getenv('SUSTAINGRAPH_PASSWORD')
database_name = os.getenv('DATABASE_NAME')

# Connect to database
driver = GraphDatabase.driver(SUSTAINGRAPH_URI, auth=(SUSTAINGRAPH_USER, SUSTAINGRAPH_PASSWORD))

# Verify connectivity
with driver.session(database=database_name) as session:
    print(session.run("RETURN 'Connected to ' + $db", db=database_name).single()[0])

Connected to neo4j


The development of the SustainGraph, related to the tracking and evolution of Sustainable Development Goal (SDG) indicators, is within the framework of the ARSINOE H2020 research project. The SustainGraph is enriched with data related to the Case Studies of the ARSINOE project.

## Case studies

### Constraints 


In [3]:
def create_constraint(tx,statement):
    tx.run(statement)

constraint_cs = """
CREATE CONSTRAINT cs_unique IF NOT EXISTS 
FOR (cs:CaseStudy) REQUIRE (cs.code, cs.title, cs.description) IS NODE KEY
"""
constraint_cs_type_code = """
CREATE CONSTRAINT cs_type_code IF NOT EXISTS
FOR (cs:CaseStudy) REQUIRE cs.code :: STRING
"""
constraint_cs_type_title = """
CREATE CONSTRAINT cs_type_title IF NOT EXISTS
FOR (cs:CaseStudy) REQUIRE cs.title :: STRING
"""
constraint_cs_type_descr = """
CREATE CONSTRAINT cs_type_descr IF NOT EXISTS
FOR (cs:CaseStudy) REQUIRE cs.description :: STRING
"""
statements = [constraint_cs,constraint_cs_type_code,constraint_cs_type_title,constraint_cs_type_descr]
with driver.session(database=database_name) as session:
    for statement in statements:
        session.execute_write(create_constraint, statement)

### CaseStudies Nodes

In [4]:
def cypher_run_params(tx,statement, params):
    records = tx.run(statement,parameters={"parameters":params})    
    total = records.data()[0]['total']
    return total

In [None]:
## Add Case Studies
df_cs = pd.read_excel('Data/7.ArsinoeCaseStudies.xlsx',sheet_name ='CS')

params=[]
statement_cs = """
    UNWIND $parameters as row
    MERGE (c:CaseStudy{title:row.title,code:row.code,description:row.description})
    RETURN COUNT(DISTINCT c) AS total
    """

for index, row in df_cs.iterrows():
    params_dict={'title':str(row['title']),'code':str(row['code']),'description':str(row['Description'])}
    params.append(params_dict)
    


with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_run_params, statement = statement_cs, params=params)
    print('CaseStudies imported:',total, ', expected:', len(df_cs['code']))


CaseStudies to be imported: 9 , expected: 9


### CaseStudies-Indicators
ARSINOE CaseStudies are related with UN SDG indicators

In [None]:
## Add association with SDG Indicators
weigth_cs = {'low':1,'medium':2,'high':3}

df_cs_info = pd.read_excel('Data/7.ArsinoeCaseStudies.xlsx',sheet_name ='CS_info')

params=[]
statement_cs_ind = """
    UNWIND $parameters as row
    MATCH (i:Indicator{code:row.ind_code}),(c:CaseStudy{code:row.cs_code})
    MERGE (c)-[a:ASSOCIATED_WITH{weight:row.weight,description:row.description}]->(i)
    RETURN COUNT(DISTINCT a) AS total
    """

for index, row in df_cs_info.iterrows():
    params_dict={'ind_code':str(row['Indicator']),'cs_code':str(row['Code']),
                 'description':str(row['Type']),'weight':weigth_cs[str(row['Type'])]}
    params.append(params_dict)
    
with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_run_params, statement = statement_cs_ind, params=params)
    print('Relationships imported:',total, ', expected:', len(df_cs_info['Code']))

Relationships imported: 185 expected: 185


### Case Studies - GeoAreas
The caseStudies have only NUTS3 regions (or lower) as their operational areas.

In [7]:
## Add connection with GeoAreas 
df_cs_areas = pd.read_excel("Data/7.ArsinoeCaseStudies.xlsx",sheet_name ='CS_Areas')
df_cs_areas

Unnamed: 0,title,code,area_code
0,Case Study 1,cs_1,EL301
1,Case Study 1,cs_1,EL302
2,Case Study 1,cs_1,EL303
3,Case Study 1,cs_1,EL304
4,Case Study 1,cs_1,EL305
5,Case Study 3,cs_3,DE263
6,Case Study 3,cs_3,DE26C
7,Case Study 3,cs_3,DE265
8,Case Study 3,cs_3,DE268
9,Case Study 4,cs_4,MK003


In [8]:
df_cs_areas_grouped = df_cs_areas.groupby('code')['area_code'].apply(list).reset_index(name='areas')
df_cs_areas_grouped

Unnamed: 0,code,areas
0,cs_1,"[EL301, EL302, EL303, EL304, EL305]"
1,cs_3,"[DE263, DE26C, DE265, DE268]"
2,cs_4,"[MK003, MK005, EL533, AL034]"
3,cs_5,"[ES703, ES704, ES705, ES706, ES707, ES708, ES709]"
4,cs_6,"[EL524, EL521, TR100, TR811, TR813, TR823, TR4..."
5,cs_7,[DK032]
6,cs_9,"[ITG2D, ITG2E, ITG2F, ITG2G, ITG2H]"


In [9]:
def cypher_run_cs(tx,statement, casestudies):
    records = tx.run(statement,parameters={"casestudies":casestudies})    
    return records.data()

In [10]:
# Import data to the graph 
statement_cs_area = """
    UNWIND $casestudies as cs
    MATCH (c:CaseStudy{code:cs.cs_code}), (a:GeoArea)
    WHERE ANY(code IN cs.area_codes WHERE code=a.EUcode)
    MERGE (c)-[r:REFERS_TO_AREA]->(a) 
    RETURN c.code as cs_code, count(r) as areas
"""
casestudies = [
    {
        'cs_code': str(row['code']),
        'area_codes': row['areas']
    }
    for index, row in df_cs_areas_grouped.iterrows()
]

with driver.session(database=database_name) as session:
    result = session.execute_write(cypher_run_cs, statement = statement_cs_area, casestudies=casestudies)
    print('CaseStudies with GeoAreas to be imported: ')
    print(pd.DataFrame([{'cs_code': item['cs_code'], 'areas': len(item['area_codes'])} for item in casestudies]))
    print('CaseStudies with GeoAreas imported: ')
    print(pd.DataFrame(result))

CaseStudies with GeoAreas to be imported: 
  cs_code  areas
0    cs_1      5
1    cs_3      4
2    cs_4      4
3    cs_5      7
4    cs_6     13
5    cs_7      1
6    cs_9      5
CaseStudies with GeoAreas imported: 
  cs_code  areas
0    cs_1      5
1    cs_3      4
2    cs_4      4
3    cs_5      7
4    cs_6     13
5    cs_7      1
6    cs_9      5


### CaseStudies - Hazards
CaseStudies have specified a specific set of Hazards that they aim to tackle.

In [11]:
dataframes = [
    pd.read_excel('Data/6.Hazards_and_Innovations.xlsx',sheet_name = 'cs'+str(i)) 
    for i in range(1,10)
]
df_haz_cs = pd.concat(dataframes,ignore_index=True)
df_haz_cs

Unnamed: 0,Case Study,Category,Hazard,Weight
0,cs_1,Meteorological_Hydrological,Heatwave,High
1,cs_1,Environmental,Biodiversity loss,High
2,cs_1,Societal,Financial shock,Medium
3,cs_1,Geohazard,Aquifer recharge (systems failure/ outages),Medium
4,cs_1,Meteorological_Hydrological,Drought,Medium
...,...,...,...,...
172,cs_9,Biological,Locust,High
173,cs_9,Biological,Invasive weeds,High
174,cs_9,Biological,Human-Animal conflict/interaction,Medium
175,cs_9,Biological,Fungal plant disease,High


In [12]:
def cypher_hazards(tx,statement, hazards):
    records = tx.run(statement,parameters={"hazards":hazards})    
    total = records.data()[0]['total']
    return total

In [25]:
# Import data to the graph 
statement_cs_hazards = """
    UNWIND $hazards as hazard
    MATCH (c:CaseStudy{code:hazard.cs_code}), (h:Hazard)
    WHERE tolower(hazard.name) = tolower(h.name)
    MERGE (c)-[a:ASSOCIATED_WITH{weight:hazard.weight,description:tolower(hazard.description)}]->(h)
    RETURN count(a) as total
"""

weigth_hz = {'Low':1,'Medium':2,'High':3}
hazards = [
    {
        'cs_code': str(row['Case Study']),
        'name': str(row['Hazard']),
        'weight': weigth_hz[str(row['Weight'])],
        'description': str(row['Weight']).lower()
    }
    for index, row in df_haz_cs.iterrows()
]

with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_hazards, statement = statement_cs_hazards, hazards=hazards)
    print('Total number of relationships imported:', total, ', expected:', len(hazards))

Total number of relationships imported: 177 , expected: 177


## Stakeholders


Each case study is related to some stakeholders of high interest with a high or low influence on it. Stakeholders are classified to sectors of 3 categories : 'Economic Activity', 'Policy, Public and Civil Society' and 'Research/Academia'. Every stakeholder is classified to one or more sectors and subsectors of the 'Economic Activity' category according to NACE classification (https://nacev2.com/en). Academic or research institutes are also classified to sectors and subsectors based on the Field of Science (FOS) classification (https://www.oecd.org/science/inno/38235147.pdf) of the 'Research/Academia' category and individuals to sectors and subsectors based on the humanitarian decision makers taxonomy (https://www.alnap.org/help-library/the-humanitarian-decision-makers-taxonomy) of the 'Policy, Public and Civil Society' category. Each stakeholder operates in GeoAreas on NUTS level.

### Constraints 

In [14]:
def create_constraint(tx,statement):
    tx.run(statement)

constraint_sh = """
CREATE CONSTRAINT sh_unique IF NOT EXISTS 
FOR (sh:Stakeholder) REQUIRE sh.name IS NODE KEY
"""
constraint_sh_type_name = """
CREATE CONSTRAINT sh_type_name IF NOT EXISTS
FOR (sh:Stakeholder) REQUIRE sh.name :: STRING
"""
constraint_sec = """
CREATE CONSTRAINT sec_unique IF NOT EXISTS 
FOR (sec:Sector) REQUIRE (sec.name, sec.code, sec.parentSectorName, sec.parentSectorCode, sec.category) IS NODE KEY
"""
constraint_sec_type_name = """
CREATE CONSTRAINT sec_type_name IF NOT EXISTS
FOR (sec:Sector)  REQUIRE sec.name :: STRING
"""
constraint_sec_type_code = """
CREATE CONSTRAINT sec_type_code IF NOT EXISTS
FOR (sec:Sector)  REQUIRE sec.code :: STRING
"""
constraint_sec_type_parn = """
CREATE CONSTRAINT sec_type_parn IF NOT EXISTS
FOR (sec:Sector)  REQUIRE sec.parentSectorName :: STRING
"""
constraint_sec_type_parc = """
CREATE CONSTRAINT sec_type_parc IF NOT EXISTS
FOR (sec:Sector)  REQUIRE sec.parentSectorCode :: STRING
"""
constraint_sec_type_cat = """
CREATE CONSTRAINT sec_type_cat IF NOT EXISTS
FOR (sec:Sector)  REQUIRE sec.category :: STRING
"""
statements = [constraint_sh,constraint_sh_type_name,constraint_sec,constraint_sec_type_name,
              constraint_sec_type_code,constraint_sec_type_parn,constraint_sec_type_parc,constraint_sec_type_cat]
with driver.session(database=database_name) as session:
    for statement in statements:
        session.execute_write(create_constraint, statement)

In [None]:
df_sh = pd.read_excel('Data/7.Stakeholders_classification.xlsx',converters={'NACE Codes':str})
df_sh.rename(columns={"NACE Sector":"NACE_Sector"},inplace=True)
df_sh.rename(columns={"FOS Classification":"FOS_Classification"},inplace=True)
df_sh.rename(columns={"FOS Sub-classification":"FOS_Subclassification"},inplace=True)

In [16]:
# Read data from excel file
df_sh[['NACE_Sector_code','NACE_Sector_name']] = df_sh.NACE_Sector.str.split("-",expand=True) 
df_sh[['FOS_code','FOS_classification']] = df_sh.FOS_Classification.str.split("(",expand=True) 
df_sh['FOS_classification'] = df_sh['FOS_classification'].str.rstrip(')')
df_sh[['FOS_sub_code','FOS_subclassification']] = df_sh.FOS_Subclassification.str.split("(",expand=True) 
df_sh['FOS_subclassification'] = df_sh['FOS_subclassification'].str.rstrip(')')
df_sh = df_sh.map(lambda x: x.strip() if isinstance(x, str) else x)
df_sh


Unnamed: 0,id,Stakeholder Name,Operational Geoarea (NUTS Code),NACE_Sector,NACE Subsector,NACE Codes,FOS_Classification,FOS_Subclassification,Decision makers taxonomy Sector,Decision makers taxonomy Subsector,Influence,Interest,NACE_Sector_code,NACE_Sector_name,FOS_code,FOS_classification,FOS_sub_code,FOS_subclassification
0,1,"Region of Attica, General Directorate of Clima...",EL30,O - Public administration and defence; compuls...,Regulation of the activities of providing heal...,84.12,,,Public Sector,Provincal Authority,High,High,O,Public administration and defence; compulsory ...,,,,
1,2,Municipality of Athens and 40 municipalities o...,EL30,O - Public administration and defence; compuls...,General public administration activities,84.11,,,Public Sector,Municipal Authority,High,High,O,Public administration and defence; compulsory ...,,,,
2,3,Municipality of Chalandri (UIA Project),EL30,O - Public administration and defence; compuls...,General public administration activities,84.11,,,Public Sector,Municipal Authority,High,High,O,Public administration and defence; compulsory ...,,,,
3,4,"Directorate of Strategic Planning, City of Athens",EL30,O - Public administration and defence; compuls...,Regulation of the activities of providing heal...,84.12,,,Public Sector,Municipal Authority,High,High,O,Public administration and defence; compulsory ...,,,,
4,5,Directorate of Greening,EL30,O - Public administration and defence; compuls...,Regulation of the activities of providing heal...,84.12,,,Public Sector,Municipal Authority,High,High,O,Public administration and defence; compulsory ...,,,,
5,6,synAthina,EL30,S - Other Service Activities,Activities of membership organizations,94.9,,,Non-Governmental Organizations,Operational Organization,High,High,S,Other Service Activities,,,,
6,7,Urban Dig Project,EL30,"R - Arts, Entertainment and Recreation","Creative, arts and entertainment activities",90.0,,,Individuals,National Population - Volunteer Groups,High,High,R,"Arts, Entertainment and Recreation",,,,
7,8,Athens Climate Lab,EL30,S - Other Service Activities,Activities of membership organizations,94.9,,,Individuals,National Population - Volunteer Groups,High,High,S,Other Service Activities,,,,
8,9,SOMA HELLINON PROSKOPON,EL,S - Other Service Activities,Activities of membership organizations,94.9,,,Individuals,National Population - Volunteer Groups,High,High,S,Other Service Activities,,,,
9,10,WWF,EL,S - Other Service Activities,Activities of membership organizations,94.9,,,Non-Governmental Organizations,Advocacy Organization,High,High,S,Other Service Activities,,,,


In [17]:
def create_sector (name,code,parent,parentCode,category):
    sector = {
        'name':name ,
        'code':code,
        'parentSector':parent ,
        'parentSectorCode':parentCode,
        'category': category
    }
    return sector

classifications = []
for index, row in df_sh.iterrows():
    sectors = []
    sector = create_sector(row['NACE Subsector'],row['NACE Codes'],row['NACE_Sector_name'],row['NACE_Sector_code'],'Economic Activity') 
    sectors.append(sector)
    if (not pd.isnull(row['FOS_Classification'])):
        sector = create_sector(row['FOS_subclassification'],row['FOS_sub_code'],row['FOS_classification'],row['FOS_code'],'Research/Academia')
        sectors.append(sector)
    if (not pd.isnull(row['Decision makers taxonomy Subsector'])):
        sector = create_sector(row['Decision makers taxonomy Subsector'],'NA',row['Decision makers taxonomy Sector'],'NA','Policy, Public and Civil Society')
        sectors.append(sector)
   
    classification = {
        'sh_name': row['Stakeholder Name'] ,
        'interest': row['Interest'],
        'weightOfInterest': 3 if row['Interest'] == 'High' else 1,
        'influence':row['Influence'],
        'weightOfInfluence': 3 if row['Influence'] == 'High' else 1,
        'area':row['Operational Geoarea (NUTS Code)'],
        'sectors': sectors    
    }
    classifications.append(classification)


In [18]:
def cypher_sh(tx,statement, params):
    return tx.run(statement,parameters={"classifications":params})    

statement_sh = """
        UNWIND $classifications as class 
        MATCH (c:CaseStudy{code:'cs_1'}), (g:GeoArea)
        WHERE class.area in g.code
        MERGE (s:Stakeholder{name:class.sh_name})
        MERGE (s)-[:HAS_INTEREST{interest:class.interest,weightOfInterest:class.weightOfInterest,influence:class.influence,weightOfInfluence:class.weightOfInfluence}]->(c)
        MERGE (s)-[:REFERS_TO_AREA]->(g)
        WITH s, class
        UNWIND class.sectors as sector
        MERGE(sec:Sector{name:sector.name,code:sector.code,parentSectorName:sector.parentSector,parentSectorCode:sector.parentSectorCode, category:sector.category})
        MERGE (s)-[:BELONGS_TO]->(sec)
        """

with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_sh, statement = statement_sh, params=classifications)    #FIX: used to be cypher_run_params

print("Total number of Sector nodes: ",len(df_sh["NACE Codes"].dropna().unique())+len(df_sh["FOS_sub_code"].dropna().unique())+len(df_sh["Decision makers taxonomy Subsector"].dropna().unique())) 

print("Total number of Stakeholders: ", len(df_sh["id"].unique()))

Total number of Sector nodes:  24
Total number of Stakeholders:  32
