In [1]:
""" Store ICEP data in PostGIS Database.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20181001
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""
OVERWRITE_OUTPUT = 1

SCRIPT_NAME = "Y20118M10D01_RH_ICEP_Basins_PostGIS_V01"
OUTPUT_VERSION = 1

S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/ICEP"

OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

# Database settings
DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)


print("\nInput ec2: " + ec2_input_path,
      "\nInput s3 : " + S3_INPUT_PATH,
      "\nOutput postGIS table : " + OUTPUT_TABLE_NAME)



Input ec2: /volumes/data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01 
Input s3 : s3://wri-projects/Aqueduct30/finalData/ICEP 
Output postGIS table : y20118m10d01_rh_icep_basins_postgis_v01_v01


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M10D01 UTC 14:53


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_input_path}
!mkdir -p {ec2_input_path}
!aws s3 cp {S3_INPUT_PATH} {ec2_input_path} --recursive
    

download: s3://wri-projects/Aqueduct30/finalData/ICEP/icep_results.cpg to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/icep_results.cpg
download: s3://wri-projects/Aqueduct30/finalData/ICEP/README.txt to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/README.txt
download: s3://wri-projects/Aqueduct30/finalData/ICEP/wri_aqueduct3_ICEP.py to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/wri_aqueduct3_ICEP.py
download: s3://wri-projects/Aqueduct30/finalData/ICEP/icep_results.sbx to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/icep_results.sbx
download: s3://wri-projects/Aqueduct30/finalData/ICEP/icep_results.prj to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/icep_results.prj
download: s3://wri-projects/Aqueduct30/finalData/ICEP/icep_results.shp.xml to ../../../../data/Y20118M10D01_RH_ICEP_Basins_PostGIS_V01/input_V01/icep_results.shp.xml
download: s3://wri-projects/Aqueduct30/finalDa

In [4]:
import numpy as np
import pandas as pd
import geopandas as gpd
from sqlalchemy import *
from geoalchemy2 import Geometry, WKTElement
from shapely.geometry.multipolygon import MultiPolygon
pd.set_option('display.max_columns', 500)

In [5]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

In [6]:
engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
connection = engine.connect()

if OVERWRITE_OUTPUT:
    sql = text("DROP TABLE IF EXISTS {};".format(OUTPUT_TABLE_NAME))
    result = engine.execute(sql)

In [7]:
input_file_path = "{}/icep_results.shp".format(ec2_input_path)

In [16]:
gdf = gpd.read_file(input_file_path)

In [17]:
def score_to_category(score):
    if score != 5:
        cat = int(np.floor(score))
    else:
        cat = 4
    return cat

In [18]:
gdf = gdf.rename(columns={"BASINID":"icepbasinid",
                          "ICEP_raw":"icep_dimensionless",
                          "ICEP_s":"icep_score",
                          "ICEP_cat":"icep_label"})

In [19]:
gdf["icep_cat"] = gdf["icep_score"].apply(score_to_category)

In [20]:
gdf["geometry"] = gdf["geometry"].apply(lambda x: MultiPolygon([x]))

In [21]:
gdf['geom'] = gdf['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))

In [22]:
gdf.drop("geometry",axis=1,inplace=True)

In [23]:
gdf.head()

Unnamed: 0,icepbasinid,BASINID_1,icep_dimensionless,icep_score,icep_label,icep_cat,geom
0,1,1,-10.778452,0.976334,Low (< -5),0,MULTIPOLYGON (((-59.99999999999977 5.500000000...
1,2,2,-0.094111,1.226472,Low to medium (-5 to 0),1,"MULTIPOLYGON (((32.00000000000028 -3, 32.50000..."
2,3,3,-1.123661,0.969085,Low (< -5),0,"MULTIPOLYGON (((32.00000000000028 -3, 32.00000..."
3,4,4,0.934831,2.934831,Medium to high (0 to +1),2,MULTIPOLYGON (((-78.49999999999989 42.00000000...
4,5,5,-0.592593,1.101852,Low to medium (-5 to 0),1,MULTIPOLYGON (((69.50000000000028 66.000000000...


In [24]:
gdf.to_sql(name=OUTPUT_TABLE_NAME,
           con = engine,
           if_exists="replace",
           dtype={'geom': Geometry("MULTIPOLYGON ", srid= 4326)})

In [None]:
connection.close()

In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

In [None]:
Previous Runs:  
