# Store Hydrobasin related files on PostgreSQL RDS database

* Purpose of script: This script will process the hydrobasin related data into multiple tables according to the database ERD
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20171122 

The script requires a file called .password to be stored in the current working directory with the password to the database.

In [1]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2017M11D22 UTC 11:21


'3.5.4 |Continuum Analytics, Inc.| (default, Aug 14 2017, 13:26:58) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [14]:
SCRIPT_NAME = "Y2017M11D22_RH_To_Database_V01"

INPUT_VERSION = 1
INPUT_FILE_NAME = "hydrobasins_fao_fiona_merged_v%0.2d" %(INPUT_VERSION)

EC2_INPUT_PATH = "/volumes/data/%s/input/" %(SCRIPT_NAME)
EC2_OUTPUT_PATH = "/volumes/data/%s/output/" %(SCRIPT_NAME)

S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/"

# Database settings
DATABASE_IDENTIFIER = "aqueduct30v01"
DATABASE_NAME = "database01"
TABLE_NAME = str.lower(SCRIPT_NAME)

In [3]:
!rm -r {EC2_INPUT_PATH}
!rm -r {EC2_OUTPUT_PATH}

!mkdir -p {EC2_INPUT_PATH}
!mkdir -p {EC2_OUTPUT_PATH}

In [4]:
!aws s3 cp {S3_INPUT_PATH} {EC2_INPUT_PATH} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/hydrobasins_fao_fiona_merged_v01.cpg to ../../../../data/Y2017M11D22_RH_To_Database_V01/input/hydrobasins_fao_fiona_merged_v01.cpg
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/hydrobasins_fao_fiona_merged_v01.prj to ../../../../data/Y2017M11D22_RH_To_Database_V01/input/hydrobasins_fao_fiona_merged_v01.prj
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/hydrobasins_fao_fiona_merged_v01.shx to ../../../../data/Y2017M11D22_RH_To_Database_V01/input/hydrobasins_fao_fiona_merged_v01.shx
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/hydrobasins_fao_fiona_merged_v01.dbf to ../../../../data/Y2017M11D22_RH_To_Database_V01/input/hydrobasins_fao_fiona_merged_v01.dbf
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D23_RH_Merge_FAONames_V01/output/hydrobasins_fao_

In [12]:
import os
import pandas as pd
import geopandas as gpd
from ast import literal_eval
import boto3
import botocore
from sqlalchemy import *

In [15]:
# RDS Connection
def rdsConnect(database_identifier,database_name):
    rds = boto3.client('rds')
    F = open(".password","r")
    password = F.read().splitlines()[0]
    F.close()
    response = rds.describe_db_instances(DBInstanceIdentifier="%s"%(database_identifier))
    status = response["DBInstances"][0]["DBInstanceStatus"]
    print("Status:",status)
    endpoint = response["DBInstances"][0]["Endpoint"]["Address"]
    print("Endpoint:",endpoint)
    engine = create_engine('postgresql://rutgerhofste:%s@%s:5432/%s' %(password,endpoint,database_name))
    connection = engine.connect()
    return engine, connection


In [16]:
engine, connection = rdsConnect(DATABASE_IDENTIFIER,DATABASE_NAME)

Status: available
Endpoint: aqueduct30v01.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com


In [10]:
gdf = gpd.read_file(os.path.join(EC2_INPUT_PATH,INPUT_FILE_NAME+".shp"))

In [11]:
gdf.head()

Unnamed: 0,SUB_BAS,TO_BAS,MAJ_BAS,SUB_NAME,MAJ_NAME,SUB_AREA,MAJ_AREA,LEGEND,geometry
0,1001,-999,6001,Bursa / Balikesir,"Black Sea, South Coast",24573,318639,1,"(POLYGON ((27.79166666666602 40.370833333333, ..."
1,1002,-999,6001,Kocaeli,"Black Sea, South Coast",7803,318639,1,(POLYGON ((29.11666666666605 40.81666666666631...
2,1003,-999,6001,Sakarya River,"Black Sea, South Coast",63081,318639,1,"POLYGON ((30.26666666676471 41.22083333338378,..."
3,1004,-999,6001,Duzce / Bolu / Zonguldak / Karabuk,"Black Sea, South Coast",29866,318639,1,"POLYGON ((34.97083333333272 42.09999999999961,..."
4,1005,-999,6001,Kizilirmak River,"Black Sea, South Coast",77771,318639,1,"POLYGON ((34.09583333333271 41.76666666666631,..."


The idea is to store the data in two tables: major basin and minor basin together with the geometry. There is no unique identifier for the minor basins so we will use a composite key    
    

In [22]:
def compositeKey(MAJ_BAS,SUB_BAS):
    key = 'MAJ_BAS_%0.4d_SUB_BAS_%0.7d' %(MAJ_BAS,SUB_BAS)
    return key

'MAJ_BAS_0001_SUB_BAS_0000002'

In [20]:
gdf.head()

Unnamed: 0,SUB_BAS,TO_BAS,MAJ_BAS,SUB_NAME,MAJ_NAME,SUB_AREA,MAJ_AREA,LEGEND,geometry,FAOid
0,1001,-999,6001,Bursa / Balikesir,"Black Sea, South Coast",24573,318639,1,"(POLYGON ((27.79166666666602 40.370833333333, ...",
1,1002,-999,6001,Kocaeli,"Black Sea, South Coast",7803,318639,1,(POLYGON ((29.11666666666605 40.81666666666631...,
2,1003,-999,6001,Sakarya River,"Black Sea, South Coast",63081,318639,1,"POLYGON ((30.26666666676471 41.22083333338378,...",
3,1004,-999,6001,Duzce / Bolu / Zonguldak / Karabuk,"Black Sea, South Coast",29866,318639,1,"POLYGON ((34.97083333333272 42.09999999999961,...",
4,1005,-999,6001,Kizilirmak River,"Black Sea, South Coast",77771,318639,1,"POLYGON ((34.09583333333271 41.76666666666631,...",


In [26]:
gdf["test"]= gdf.apply(lambda x: compositeKey(42,43),1)

In [27]:
gdf.head()

Unnamed: 0,SUB_BAS,TO_BAS,MAJ_BAS,SUB_NAME,MAJ_NAME,SUB_AREA,MAJ_AREA,LEGEND,geometry,FAOid,test
0,1001,-999,6001,Bursa / Balikesir,"Black Sea, South Coast",24573,318639,1,"(POLYGON ((27.79166666666602 40.370833333333, ...",,MAJ_BAS_0042_SUB_BAS_0000043
1,1002,-999,6001,Kocaeli,"Black Sea, South Coast",7803,318639,1,(POLYGON ((29.11666666666605 40.81666666666631...,,MAJ_BAS_0042_SUB_BAS_0000043
2,1003,-999,6001,Sakarya River,"Black Sea, South Coast",63081,318639,1,"POLYGON ((30.26666666676471 41.22083333338378,...",,MAJ_BAS_0042_SUB_BAS_0000043
3,1004,-999,6001,Duzce / Bolu / Zonguldak / Karabuk,"Black Sea, South Coast",29866,318639,1,"POLYGON ((34.97083333333272 42.09999999999961,...",,MAJ_BAS_0042_SUB_BAS_0000043
4,1005,-999,6001,Kizilirmak River,"Black Sea, South Coast",77771,318639,1,"POLYGON ((34.09583333333271 41.76666666666631,...",,MAJ_BAS_0042_SUB_BAS_0000043
