In [1]:
""" Store gdbd and hybas deltas in postgis in lookup table.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20180725
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""

TESTING = 0
SCRIPT_NAME = "Y2018M07D25_RH_Delta_Lookup_Table_PostGIS_V01"
OVERWRITE_INPUT = 1
OUTPUT_VERSION = 1

S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2018M07D25_RH_Basin_Manual_Step_V01/hybas_deltas/"
INPUT_FILE_NAME = "hybas_deltas.csv"

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

# All Lowercase
OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)

print("\nInput ec2: " + ec2_input_path,
      "\nOutput Table Name: "+ OUTPUT_TABLE_NAME)



Input ec2: /volumes/data/Y2018M07D25_RH_Delta_Lookup_Table_PostGIS_V01/input_V01 
Output Table Name: y2018m07d25_rh_delta_lookup_table_postgis_v01_v01


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M07D25 UTC 15:27


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
if OVERWRITE_INPUT:
    !rm -r {ec2_input_path}
    !mkdir -p {ec2_input_path}

In [4]:
!aws s3 cp {S3_INPUT_PATH}{INPUT_FILE_NAME} {ec2_input_path}

download: s3://wri-projects/Aqueduct30/processData/Y2018M07D25_RH_Basin_Manual_Step_V01/hybas_deltas/hybas_deltas.csv to ../../../../data/Y2018M07D25_RH_Delta_Lookup_Table_PostGIS_V01/input_V01/hybas_deltas.csv


In [5]:
import os
import sqlalchemy
import pandas as pd

In [6]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))

In [7]:
file_path = "{}/{}".format(ec2_input_path,INPUT_FILE_NAME)

In [8]:
df = pd.read_csv(file_path)

In [9]:
def process_df(df):
    # returns deltas only in simplified form
    df_basin = df.loc[df["delta_id"]>0]
    df_simple = df_basin[["PFAF_ID","delta_id"]]
    df_simple = df_simple.rename(columns={"PFAF_ID":"pfaf_id"})
    df_simple = df_simple.sort_values(by=["delta_id"])
    return df_simple

In [10]:
df.head()

Unnamed: 0,HYBAS_ID,NEXT_DOWN,NEXT_SINK,MAIN_BAS,DIST_SINK,DIST_MAIN,SUB_AREA,UP_AREA,PFAF_ID,ENDO,COAST,ORDER,SORT,delta_id
0,6060000010,0,6060000010,6060000010,0.0,0.0,4317.4,4317.4,611001,0,1,0,1,
1,6060000200,0,6060000200,6060000200,0.0,0.0,35995.5,35996.7,611002,0,0,1,2,
2,6060000210,0,6060000210,6060000210,0.0,0.0,443.9,443.9,611003,0,1,0,3,
3,6060000240,0,6060000240,6060000240,0.0,0.0,2186.3,2186.3,611004,0,0,1,4,
4,6060000250,0,6060000250,6060000250,0.0,0.0,6533.8,6533.8,611005,0,1,0,5,


In [11]:
df_simple = process_df(df)

In [12]:
df_simple.shape

(196, 2)

In [13]:
df_simple.to_sql(name=OUTPUT_TABLE_NAME,
                 con=engine,
                 if_exists = "replace" )

In [14]:
engine.dispose()

In [15]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:00:04.853318
