In [1]:
from neo4j import GraphDatabase, basic_auth
import os
from dotenv import load_dotenv
from pathlib import Path
import requests, pandas as pd
import xml.etree.ElementTree as ET

In [2]:
#load the environment variables
dotenv_path = Path('~/.env')
load_dotenv(dotenv_path=dotenv_path)  # This line brings all environment variables from .env into os.environ

# Get variables
SUSTAINGRAPH_URI = os.getenv('SUSTAINGRAPH_URI')
SUSTAINGRAPH_USER = os.getenv('SUSTAINGRAPH_USER')
SUSTAINGRAPH_PASSWORD = os.getenv('SUSTAINGRAPH_PASSWORD')
database_name = os.getenv('DATABASE_NAME')

# Connect to database
driver = GraphDatabase.driver(SUSTAINGRAPH_URI, auth=(SUSTAINGRAPH_USER, SUSTAINGRAPH_PASSWORD))

# Verify connectivity
with driver.session(database=database_name) as session:
    print(session.run("RETURN 'Connected to ' + $db", db=database_name).single()[0])

Connected to neo4j


### Constraints

In [3]:
def create_constraint(tx,statement):
    tx.run(statement)

In [4]:
# Innovations
constraint_innov = """
CREATE CONSTRAINT innov_unique IF NOT EXISTS 
FOR (i:Innovation) REQUIRE (i.name, i.typeOfSolution, i.trl, i.applicationDomain,i.permalink) IS NODE KEY
"""
constraint_innov_type_name = """
CREATE CONSTRAINT innov_type_name IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.name :: STRING
"""
constraint_innov_type_trl = """
CREATE CONSTRAINT innov_type_trl IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.trl :: INTEGER
"""
constraint_innov_type_app = """
CREATE CONSTRAINT innov_type_app IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.applicationDomain :: LIST<STRING NOT NULL>
"""
constraint_innov_type_sol = """
CREATE CONSTRAINT innov_type_app IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.typeOfSolution :: LIST<STRING NOT NULL>
"""
constraint_innov_type_descr = """
CREATE CONSTRAINT innov_type_descr IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.description :: STRING
"""
constraint_innov_type_trldescr = """
CREATE CONSTRAINT innov_type_trldescr IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.trlDescription :: STRING
"""
constraint_innov_type_permalink = """
CREATE CONSTRAINT innov_type_permalink IF NOT EXISTS
FOR (i:Innovation) REQUIRE i.permalink :: STRING
"""

In [5]:
statements = [constraint_innov,constraint_innov_type_name,constraint_innov_type_trl,constraint_innov_type_app,constraint_innov_type_sol,
              constraint_innov_type_trldescr,constraint_innov_type_descr, constraint_innov_type_permalink]
with driver.session(database=database_name) as session:
    for statement in statements:
        session.execute_write(create_constraint, statement)

In [6]:
#Hazards
constraint_hazard = """
CREATE CONSTRAINT hazard_unique IF NOT EXISTS 
FOR (h:Hazard) REQUIRE (h.type,h.cluster,h.name) IS NODE KEY
"""
constraint_hazard_type = """
CREATE CONSTRAINT hazard_type_type IF NOT EXISTS
FOR (h:Hazard) REQUIRE h.type :: STRING
"""
constraint_hazard_cluster = """
CREATE CONSTRAINT hazard_type_cluster IF NOT EXISTS
FOR (h:Hazard) REQUIRE h.cluster :: STRING
"""
constraint_hazard_name = """
CREATE CONSTRAINT hazard_type_name IF NOT EXISTS
FOR (h:Hazard) REQUIRE h.name :: STRING
"""

statements = [constraint_hazard,constraint_hazard_type,
              constraint_hazard_cluster,constraint_hazard_name]
with driver.session(database=database_name) as session:
    for statement in statements:
        session.execute_write(create_constraint, statement)

### Hazards (UNDRR)

In [7]:
def cypher_run_params(tx,statement, params):
    records = tx.run(statement,parameters={"parameters":params})    
    total = records.data()[0]['total']
    return total

In [8]:
df_haz = pd.read_excel('Data/6.Hazards_and_Innovations.xlsx',sheet_name ='Hazards_UNDRR')

params=[]
statement_hazards = """
    UNWIND $parameters as row
    MERGE (hz:Hazard{type:row.type,cluster:row.cluster,name:row.name})
    RETURN COUNT(DISTINCT hz) as total
    """

for index, row in df_haz.iterrows():
    params_dict={'type':str(row['Hazard type'].title()),'cluster':str(row['Hazard cluster'].title()),
                 'name':str(row['Specific hazard'].title())}
    params.append(params_dict)

with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_run_params, statement = statement_hazards, params=params)
    print('Hazards to be imported: ',len(df_haz['Specific hazard']))
    print('Total hazards imported: ',total)

Hazards to be imported:  302
Total hazards imported:  302


## Innovations

The Climate Innovation Window (CIW) aims to be a reference portal for innovations on climate change adaptation. The Climate Innovation Window enables users to browse the innovations database by issues-hazards (coastal floods, droughts, heatwaves, heavy precipitation, river foods, pluvial floods, storms, wildfires and multi-hazards, etc.), areas (agriculture, energy, forestry, biodiversity, urban areas, etc.) and solutions (education, governance, models & tools, nature-based solutions, etc.). The maturity of each innovation is assessed through the Technical Readiness Level (TRL), a scale consisting of nine levels where each level characterizes the progress in the development of the innovation, from the initial idea (Level 1) to the full uptake of the product into the marketplace (Level 9).

In [9]:
def cypher_run(tx,statement, params):
    records = tx.run(statement,parameters={"parameters":params})    
    total = records.data()[0]['total']
    return total
def cypher_total (tx, statement):
    records = tx.run(statement)    
    total = records.data()[0]['total']
    return total


* SRL level is not included as a property

* Permalink tag refers to the link to the innovation. If the value starts with ![CDATA[ then the link is not valid and the innovation is a test , so its is not included 

* AuthorFirstname and AUthorLastName refer to the name of the editor that added the innovation to climatechangewindow so it is not included 


In this xml the tag "issues" refers to the hazards. If the value is "multi-hazards" and it does not specify the hazards then the innovation is not connected to any hazards in the SustainGraph. Here is the mapping of issues to the name of the Hazards (UNDRR) hosted in the SustainGraph

In [10]:
hazards_map = {
    'Coastal floods': 'Coastal Flood',
    'Storms': 'Thunderstorm',
    'Droughts': 'Drought',
    'Heatwaves':'Heatwave',
    'Wildfires':'Wildfires',
    'Sea Level rise': 'Sea-Level Rise',
    'Pluvial Flood': 'Surface Water Flooding',
    'Frost': 'Frost (Hoar Frost)',
    'River floods': 'Fluvial (Riverine Flood)',
    'Heavy Precipitation': 'Hail'
}

In [11]:
url = 'https://www.climateinnovationwindow.eu/wp-content/uploads/wpallexport/exports/26662675dc14f45dcf14b72f68230a03/current-Innovations-Export-2024-May-02-1310.xml'
response = requests.get(url)
# Ensure the request was successful
if response.status_code == 200:
    # Parse the XML content
    root = ET.fromstring(response.content)
else:
    print(f"Failed to retrieve XML, status code: {response.status_code}")

In [12]:
def xml_to_dict(element):
    result = {}
    for child in element:
        if len(child):  # If the child has children, recursively build the dictionary
            result[child.tag] = xml_to_dict(child)
        else: # If the child is a leaf node, get the text content
            result[child.tag] = child.text
    return result

In [13]:
data_to_import = []
for child in root:
   #Every innovation registration is stored into a data dict
   data = xml_to_dict(child)
   #Test innovations are not included in the list to be imported 
   if data['Title'].startswith('Gretchen Whitmer:') or \
      data['Title'].startswith('Whatâ€™s the name of your innovation?') or \
      data['Title'].startswith('GM4W - ') or \
      data['Title'].startswith('Deserunt voluptatem') or \
      data['Title'].startswith('EVACOLD-CO2') or \
      data['Title'].startswith('EZSal') or \
      data['Title'].startswith('Fuel-less Torque Generator(Power)') or \
      data['Title'].startswith('Test Innovation'):
         continue
   data_to_import.append({
        'name': data['Title'],
        'description':'' if not data['intro-text']  else data['intro-text'].replace('<p>', '').replace('</p>', '').replace('\n', ''),
        'typeOfSolution': [] if not data['Solutions'] else data['Solutions'].split('|'),
        'applicationDomain': [] if not data['Areas'] else data['Areas'].split('|'),
        'trl': int(data['TRLLevel'][3:4]),
        'trlDescription': data['TRLLevel'][6:],
        'permalink': data['Permalink'],
        'hazards': [] if data['Issues'] == 'Multi-hazards' else [hazards_map.get(issue, issue) for issue in data['Issues'].split('|')]
   })

In [14]:
statement_innovations = """
UNWIND $parameters as row
MERGE (i:Innovation{name:row.name, description:row.description, typeOfSolution:row.typeOfSolution, applicationDomain: row.applicationDomain, trl: row.trl,
trlDescription: row.trlDescription, permalink: row.permalink})
WITH i, row
UNWIND row.hazards as hazard
MATCH (hz:Hazard{name:hazard})
MERGE (i)-[:TARGETED_TO]->(hz)
RETURN COUNT(DISTINCT i) AS total
"""
statement_total_innovations = """ 
MATCH (n:Innovation)
RETURN COUNT(n) AS total
"""
with driver.session(database=database_name) as session:
    total = session.execute_write(cypher_run, statement = statement_innovations, params=data_to_import)
    print('Innovations to be imported: ',len(data_to_import))
    total_innov = session.execute_write(cypher_total, statement = statement_total_innovations)
    print('Total innovations imported: ',total_innov)
    print('Total innovations with relartionship to hazards imported: ',total)

Innovations to be imported:  117
Total innovations imported:  117
Total innovations with relartionship to hazards imported:  83
