In [1]:
""" Create shapefile and geodataframe from list of pfaf_ids for testing
-------------------------------------------------------------------------------


Author: Rutger Hofste
Date: 20180327
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = "Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01"
OUTPUT_VERSION = 1

DICTJE = {172265:"Large stream in only one cell. perpendicular contributing areas",
          172263: "Large stream in a few cells, perpendicular contributing areas",
          172261: "Tiny basin smaller than one 5min cell",
          172250: "Large basin with main stream",
          172306: "Large basin with main stream and other stream in most downstream cell",
          172521: "Small basin with a confluence within basin. Stream_order increases in most downstream cell but is part of basin",
          172144: "Basin with an endorheic basin in one of its upstream cells"}


DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME = "hybas06_v04"
OUTPUT_FILE_NAME = "hybas06_v04_Selection"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)


print("Input ec2: " + ec2_input_path,
      "\nOutput ec2: " + ec2_output_path,
      "\nOutput s3: " + s3_output_path)

Input ec2: /volumes/data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/input_V01 
Output ec2: /volumes/data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01 
Output s3: s3://wri-projects/Aqueduct30/processData/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M05D25 UTC 08:30


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_input_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_output_path}

In [4]:
# imports
import re
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import aqueduct3
from datetime import timedelta
from sqlalchemy import *
pd.set_option('display.max_columns', 500)

In [5]:
def postGisToGdf(connection,tableName):
    """this function gets a geoDataFrame from a postGIS database instance
    
    
    Args:
        connection (sqlalchemy.engine.base.Connection) : postGIS enabled database connection 
        tableName (string) : table name
 
    Returns:
        gdf (geoPandas.GeoDataFrame) : the geodataframe from PostGIS
        
    todo:
        allow for SQL filtering
    
    
    """   
    gdf =gpd.GeoDataFrame.from_postgis("select * from %s" %(tableName),connection,geom_col='geom' )
    gdf.crs =  {'init' :'epsg:4326'}
    return gdf

In [6]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
connection = engine.connect()

In [7]:
gdf = postGisToGdf(connection,INPUT_TABLE_NAME)

In [8]:
gdf_selection = gdf[gdf["pfaf_id"].isin(DICTJE.keys())]

In [9]:
df = pd.DataFrame.from_dict(DICTJE,orient="index")
df.columns = ["comment"]
df["pfaf_id"] = df.index

In [10]:
gdf_selection_merged = gdf_selection.merge(df,on="pfaf_id")

In [11]:
gdf_selection_merged

Unnamed: 0,hybas_id,next_down,next_sink,main_bas,dist_sink,dist_main,sub_area,up_area,endo,coast,order,sort,pfaf_id,geom,comment
0,1060535340,1060525270,1060034260,1060034260,2371.4,2371.4,25458.3,25458.3,0,0,3,2026,172144,(POLYGON ((27.14583333333336 16.18750000000002...,Basin with an endorheic basin in one of its up...
1,1060538700,1060531620,1060034260,1060034260,2919.4,2919.4,20388.9,117731.3,0,0,2,2061,172250,"(POLYGON ((34.38750000000002 17.3541666666667,...",Large basin with main stream
2,1060648450,1060538700,1060034260,1060034260,3392.4,3392.4,12.4,30208.5,0,0,3,2080,172261,"(POLYGON ((35.8541666666667 14.29166666666669,...",Tiny basin smaller than one 5min cell
3,1060658920,1060649730,1060034260,1060034260,3439.2,3439.2,798.6,25727.8,0,0,3,2089,172265,"(POLYGON ((36.01250000000002 13.8041666666667,...",Large stream in only one cell. perpendicular c...
4,1060554260,1060542340,1060034260,1060034260,2950.2,2950.2,7665.8,7665.8,0,0,2,2065,172306,(POLYGON ((33.76666666666669 16.64166666666669...,Large basin with main stream and other stream ...
5,1060645140,1060601190,1060034260,1060034260,3346.6,3346.6,362.0,34940.0,0,0,2,2076,172521,(POLYGON ((32.06666666666668 14.52916666666669...,Small basin with a confluence within basin. St...
6,1060649730,1060648450,1060034260,1060034260,3397.2,3397.2,1755.1,28504.2,0,0,3,2082,172263,(POLYGON ((35.94166666666668 13.93750000000002...,"Large stream in a few cells, perpendicular con..."


In [12]:
output_file_path = "{}/{}".format(ec2_output_path,OUTPUT_FILE_NAME)

In [13]:
gdf_selection_merged.to_file(filename=output_file_path,driver="ESRI Shapefile")

In [14]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.cpg to s3://wri-projects/Aqueduct30/processData/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.cpg
upload: ../../../../data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.dbf to s3://wri-projects/Aqueduct30/processData/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.dbf
upload: ../../../../data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.prj to s3://wri-projects/Aqueduct30/processData/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.prj
upload: ../../../../data/Y2018M05D25_RH_Create_Test_Shapefile_Basins_V01/output_V01/hybas06_v04_Selection/hybas06_v04_Selection.shx to s3://wri-project

File manually zipped and ingested on Earthengine