In [1]:
""" Create horizontal table for readability. 
-------------------------------------------------------------------------------

Data is strored vertically in bigquery which means each indicator has its 
own row. This script puts the verious indicators as columns in a new,
horizontal table. Additional useful attributes are added. 

gadm metadata:
https://gadm.org/metadata.html
 

Author: Rutger Hofste
Date: 20181214
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = 'Y2018M12D14_RH_Master_Horizontal_GPD_V01'
OUTPUT_VERSION = 10


# AWS RDS PostGIS
DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

POSTGIS_INPUT_TABLE_NAME = "y2018m12d06_rh_master_shape_v01_v02"

# BigQuery 
BQ_IN = {}
# gadm
BQ_IN["GADM36L01"] = "y2018m11d12_rh_gadm36_level1_rds_to_bq_v01_v01"
# Area 
BQ_IN["area"] = 'y2018m12d07_rh_process_area_bq_v01_v01'

# too slow, using s3 instead
BQ_IN["indicators"] = 'y2018m12d11_rh_master_weights_gpd_v02_v09'
S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V10"

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME = "aqueduct30v01"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("PostGIS table name: ", POSTGIS_INPUT_TABLE_NAME,
      "\nBQ_OUTPUT_DATASET_NAME: ", BQ_OUTPUT_DATASET_NAME,
      "\nBQ_OUTPUT_TABLE_NAME: ", BQ_OUTPUT_TABLE_NAME,
      "\ns3_output_path: ", s3_output_path,
      "\nec2_output_path:" , ec2_output_path)

PostGIS table name:  y2018m12d06_rh_master_shape_v01_v02 
BQ_OUTPUT_DATASET_NAME:  aqueduct30v01 
BQ_OUTPUT_TABLE_NAME:  y2018m12d14_rh_master_horizontal_gpd_v01_v10 
s3_output_path:  s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/ 
ec2_output_path: /volumes/data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M04D10 UTC 09:17


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

rm: cannot remove '/volumes/data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10': No such file or directory


In [4]:
!aws s3 cp {S3_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V10/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V10/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl
download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V10/Y2018M12D11_RH_Master_Weights_GPD_V02.csv to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V10/Y2018M12D11_RH_Master_Weights_GPD_V02.csv


In [5]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import sqlalchemy
from google.cloud import bigquery
from shapely.geometry.multipolygon import MultiPolygon
from geoalchemy2 import Geometry, WKTElement

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

%matplotlib inline
pd.set_option('display.max_columns', 500)


In [6]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
connection = engine.connect()

In [7]:
def uploadGDFtoPostGIS(gdf,tableName,saveIndex):
    # this function uploads a polygon shapefile to table in AWS RDS. 
    # It handles combined polygon/multipolygon geometry and stores it in valid multipolygon in epsg 4326.
    
    # gdf = input geoDataframe
    # tableName = postGIS table name (string)
    # saveIndex = save index column in separate column in postgresql, otherwise discarded. (Boolean)
    
    
    gdf["type"] = gdf.geometry.geom_type    
    geomTypes = ["Polygon","MultiPolygon"]
    
    for geomType in geomTypes:
        gdfType = gdf.loc[gdf["type"]== geomType]
        geomTypeLower = str.lower(geomType)
        gdfType['geom'] = gdfType['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
        gdfType.drop(["geometry","type"],1, inplace=True)      
        print("Create table temp%s" %(geomTypeLower)) 
        gdfType.to_sql(
            name = "temp%s" %(geomTypeLower),
            con = engine,
            if_exists='replace',
            index= saveIndex, 
            dtype={'geom': Geometry(str.upper(geomType), srid= 4326)}
        )
        
    # Merge both tables and make valid
    sql = []
    sql.append("DROP TABLE IF EXISTS %s"  %(tableName))
    sql.append("ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);")
    sql.append("CREATE TABLE %s AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);" %(tableName))
    sql.append("UPDATE %s SET geom = st_makevalid(geom);" %(tableName))
    sql.append("DROP TABLE temppolygon,tempmultipolygon")

    for statement in sql:
        print(statement)
        result = connection.execute(statement)    
    gdfFromSQL =gpd.GeoDataFrame.from_postgis("select * from %s" %(tableName),connection,geom_col='geom' )
    return gdfFromSQL

In [8]:
sql = """
SELECT
    aq30_id,
    string_id,
    pfaf_id,
    gid_1, 
    aqid,
    geom
FROM {}
""".format(POSTGIS_INPUT_TABLE_NAME)

In [9]:
gdf =gpd.GeoDataFrame.from_postgis(sql,engine,geom_col='geom')

In [10]:
gdf.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...


In [11]:
gdf.shape

(68511, 6)

In [12]:
gdf_master = gdf

In [13]:
sql_gadm = """
SELECT
  gid_1,
  name_1,
  gid_0,
  name_0,
  varname_1,
  nl_name_1,
  type_1,
  engtype_1,
  cc_1,
  hasc_1
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["GADM36L01"])

In [14]:
df_gadm = pd.read_gbq(query=sql_gadm,dialect="standard")

In [15]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_gadm,
                      left_on ="gid_1",
                      right_on = "gid_1",
                      how = "left")

In [16]:
sql_area = """
SELECT
  string_id,
  area_km2
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["area"])
df_area = pd.read_gbq(query=sql_area,dialect="standard")

In [17]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_area,
                      left_on ="string_id",
                      right_on = "string_id",
                      how = "left")

In [18]:
sql_in = """
SELECT
  string_id,
  indicator,
  group_short,
  industry_short,
  raw,
  score,
  cat,
  label,
  weight_fraction,
  weighted_score
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["indicators"])
#df_in = pd.read_gbq(query=sql_in,dialect="standard") # Takes too long, reverting to pickled file instead

In [19]:
source_path = "{}/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl".format(ec2_input_path)

In [20]:
df_in = pd.read_pickle(source_path)

In [21]:
df_in.head()

Unnamed: 0,cat,group_short,indicator,industry_short,label,raw,score,string_id,weight_fraction,weighted_score
0,2.0,rrr,rri,fnb,Medium - High (50-60%),58.0,2.8,111011-EGY.11_1-3365,0.081633,0.228571
1,2.0,rrr,rri,che,Medium - High (50-60%),58.0,2.8,111011-EGY.11_1-3365,0.07619,0.213333
2,2.0,rrr,rri,smc,Medium - High (50-60%),58.0,2.8,111011-EGY.11_1-3365,0.095238,0.266667
3,2.0,rrr,rri,ong,Medium - High (50-60%),58.0,2.8,111011-EGY.11_1-3365,0.183908,0.514943
4,2.0,rrr,rri,min,Medium - High (50-60%),58.0,2.8,111011-EGY.11_1-3365,0.122137,0.341985


# Append (horizontally) all indicators

In [22]:
indicators = list(df_in["indicator"].unique())
indicators.remove('awr')
for indicator in indicators:
    print(indicator)
    df_sel = df_in.loc[(df_in["industry_short"] == "def") &(df_in["indicator"] == indicator)]
    df_out = df_sel[["string_id","raw","score","cat","label"]]
    df_out.columns = ["string_id",
                      indicator + "_raw",
                      indicator +"_score",
                      indicator +"_cat",
                      indicator +"_label"]
    gdf_master = pd.merge(left=gdf_master,
                          right=df_out,
                          left_on ="string_id",
                          right_on = "string_id",
                          how="left")

rri
gtd
bws
udw
ucw
cep
rfr
usa
sev
drr
cfr
iav
bwd


In [23]:
gdf_master.loc[gdf_master["string_id"] == "253001-SJM.2_1-89"]

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,rri_raw,rri_score,rri_cat,rri_label,gtd_raw,gtd_score,gtd_cat,gtd_label,bws_raw,bws_score,bws_cat,bws_label,udw_raw,udw_score,udw_cat,udw_label,ucw_raw,ucw_score,ucw_cat,ucw_label,cep_raw,cep_score,cep_cat,cep_label,rfr_raw,rfr_score,rfr_cat,rfr_label,usa_raw,usa_score,usa_cat,usa_label,sev_raw,sev_score,sev_cat,sev_label,drr_raw,drr_score,drr_cat,drr_label,cfr_raw,cfr_score,cfr_cat,cfr_label,iav_raw,iav_score,iav_cat,iav_label,bwd_raw,bwd_score,bwd_cat,bwd_label
7731,20607,253001-SJM.2_1-89,253001,SJM.2_1,89,(POLYGON ((16.34196631819108 80.05218058366709...,Svalbard,SJM,Svalbard and Jan Mayen,,,Territory,Territory,,SJ.SV,35921.638036,,,,,0.01858,,,Insignificant Trend,,,,,0.0,0.0,0.0,Low (<2.5%),,,,,-1.438554,1.712289,1.0,Low - Medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 1 in 1,000)",0.0,0.0,0.0,Low (<2.5%),0.925506,2.776519,2.0,Medium - High (0.66-1.00),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.380097,1.520388,1.0,Low - Medium (0.25-0.50),,,,


# Append (horizontally) all aggregated water risk scores

In [24]:
indicator = "awr"
industries = list(df_in["industry_short"].unique())
groups = list(df_in["group_short"].unique())

for industry in industries:
    for group in groups:
        print(industry,group)
        df_sel = df_in.loc[(df_in["industry_short"] == industry) &(df_in["group_short"] == group) &(df_in["indicator"] == indicator)]
        df_out = df_sel[["string_id","raw","score","cat","label","weight_fraction"]]

        df_out.columns = ["string_id",
                          "w_{}_{}_{}_raw".format(indicator,industry,group),
                          "w_{}_{}_{}_score".format(indicator,industry,group),
                          "w_{}_{}_{}_cat".format(indicator,industry,group),
                          "w_{}_{}_{}_label".format(indicator,industry,group),
                          "w_{}_{}_{}_weight_fraction".format(indicator,industry,group)]
        gdf_master = pd.merge(left=gdf_master,
                              right=df_out,
                              left_on ="string_id",
                              right_on = "string_id",
                              how="left")

fnb rrr
fnb qan
fnb qal
fnb tot
che rrr
che qan
che qal
che tot
smc rrr
smc qan
smc qal
smc tot
ong rrr
ong qan
ong qal
ong tot
min rrr
min qan
min qal
min tot
tex rrr
tex qan
tex qal
tex tot
agr rrr
agr qan
agr qal
agr tot
elp rrr
elp qan
elp qal
elp tot
def rrr
def qan
def qal
def tot
con rrr
con qan
con qal
con tot


In [25]:
gdf_master.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,rri_raw,rri_score,rri_cat,rri_label,gtd_raw,gtd_score,gtd_cat,gtd_label,bws_raw,bws_score,bws_cat,bws_label,udw_raw,udw_score,udw_cat,udw_label,ucw_raw,ucw_score,ucw_cat,ucw_label,cep_raw,cep_score,cep_cat,cep_label,rfr_raw,rfr_score,rfr_cat,rfr_label,usa_raw,usa_score,usa_cat,usa_label,sev_raw,sev_score,sev_cat,sev_label,drr_raw,drr_score,drr_cat,drr_label,cfr_raw,cfr_score,cfr_cat,cfr_label,iav_raw,iav_score,iav_cat,iav_label,bwd_raw,bwd_score,bwd_cat,bwd_label,w_awr_fnb_rrr_raw,w_awr_fnb_rrr_score,w_awr_fnb_rrr_cat,w_awr_fnb_rrr_label,w_awr_fnb_rrr_weight_fraction,w_awr_fnb_qan_raw,w_awr_fnb_qan_score,w_awr_fnb_qan_cat,w_awr_fnb_qan_label,w_awr_fnb_qan_weight_fraction,w_awr_fnb_qal_raw,w_awr_fnb_qal_score,w_awr_fnb_qal_cat,w_awr_fnb_qal_label,w_awr_fnb_qal_weight_fraction,w_awr_fnb_tot_raw,w_awr_fnb_tot_score,w_awr_fnb_tot_cat,w_awr_fnb_tot_label,w_awr_fnb_tot_weight_fraction,w_awr_che_rrr_raw,w_awr_che_rrr_score,w_awr_che_rrr_cat,w_awr_che_rrr_label,w_awr_che_rrr_weight_fraction,w_awr_che_qan_raw,w_awr_che_qan_score,w_awr_che_qan_cat,w_awr_che_qan_label,w_awr_che_qan_weight_fraction,w_awr_che_qal_raw,w_awr_che_qal_score,w_awr_che_qal_cat,w_awr_che_qal_label,w_awr_che_qal_weight_fraction,w_awr_che_tot_raw,w_awr_che_tot_score,w_awr_che_tot_cat,w_awr_che_tot_label,w_awr_che_tot_weight_fraction,w_awr_smc_rrr_raw,w_awr_smc_rrr_score,w_awr_smc_rrr_cat,w_awr_smc_rrr_label,w_awr_smc_rrr_weight_fraction,w_awr_smc_qan_raw,w_awr_smc_qan_score,w_awr_smc_qan_cat,w_awr_smc_qan_label,w_awr_smc_qan_weight_fraction,w_awr_smc_qal_raw,w_awr_smc_qal_score,w_awr_smc_qal_cat,w_awr_smc_qal_label,w_awr_smc_qal_weight_fraction,w_awr_smc_tot_raw,w_awr_smc_tot_score,w_awr_smc_tot_cat,w_awr_smc_tot_label,w_awr_smc_tot_weight_fraction,w_awr_ong_rrr_raw,w_awr_ong_rrr_score,w_awr_ong_rrr_cat,w_awr_ong_rrr_label,w_awr_ong_rrr_weight_fraction,w_awr_ong_qan_raw,w_awr_ong_qan_score,w_awr_ong_qan_cat,w_awr_ong_qan_label,w_awr_ong_qan_weight_fraction,w_awr_ong_qal_raw,w_awr_ong_qal_score,w_awr_ong_qal_cat,w_awr_ong_qal_label,w_awr_ong_qal_weight_fraction,w_awr_ong_tot_raw,w_awr_ong_tot_score,w_awr_ong_tot_cat,w_awr_ong_tot_label,w_awr_ong_tot_weight_fraction,w_awr_min_rrr_raw,w_awr_min_rrr_score,w_awr_min_rrr_cat,w_awr_min_rrr_label,w_awr_min_rrr_weight_fraction,w_awr_min_qan_raw,w_awr_min_qan_score,w_awr_min_qan_cat,w_awr_min_qan_label,w_awr_min_qan_weight_fraction,w_awr_min_qal_raw,w_awr_min_qal_score,w_awr_min_qal_cat,w_awr_min_qal_label,w_awr_min_qal_weight_fraction,w_awr_min_tot_raw,w_awr_min_tot_score,w_awr_min_tot_cat,w_awr_min_tot_label,w_awr_min_tot_weight_fraction,w_awr_tex_rrr_raw,w_awr_tex_rrr_score,w_awr_tex_rrr_cat,w_awr_tex_rrr_label,w_awr_tex_rrr_weight_fraction,w_awr_tex_qan_raw,w_awr_tex_qan_score,w_awr_tex_qan_cat,w_awr_tex_qan_label,w_awr_tex_qan_weight_fraction,w_awr_tex_qal_raw,w_awr_tex_qal_score,w_awr_tex_qal_cat,w_awr_tex_qal_label,w_awr_tex_qal_weight_fraction,w_awr_tex_tot_raw,w_awr_tex_tot_score,w_awr_tex_tot_cat,w_awr_tex_tot_label,w_awr_tex_tot_weight_fraction,w_awr_agr_rrr_raw,w_awr_agr_rrr_score,w_awr_agr_rrr_cat,w_awr_agr_rrr_label,w_awr_agr_rrr_weight_fraction,w_awr_agr_qan_raw,w_awr_agr_qan_score,w_awr_agr_qan_cat,w_awr_agr_qan_label,w_awr_agr_qan_weight_fraction,w_awr_agr_qal_raw,w_awr_agr_qal_score,w_awr_agr_qal_cat,w_awr_agr_qal_label,w_awr_agr_qal_weight_fraction,w_awr_agr_tot_raw,w_awr_agr_tot_score,w_awr_agr_tot_cat,w_awr_agr_tot_label,w_awr_agr_tot_weight_fraction,w_awr_elp_rrr_raw,w_awr_elp_rrr_score,w_awr_elp_rrr_cat,w_awr_elp_rrr_label,w_awr_elp_rrr_weight_fraction,w_awr_elp_qan_raw,w_awr_elp_qan_score,w_awr_elp_qan_cat,w_awr_elp_qan_label,w_awr_elp_qan_weight_fraction,w_awr_elp_qal_raw,w_awr_elp_qal_score,w_awr_elp_qal_cat,w_awr_elp_qal_label,w_awr_elp_qal_weight_fraction,w_awr_elp_tot_raw,w_awr_elp_tot_score,w_awr_elp_tot_cat,w_awr_elp_tot_label,w_awr_elp_tot_weight_fraction,w_awr_def_rrr_raw,w_awr_def_rrr_score,w_awr_def_rrr_cat,w_awr_def_rrr_label,w_awr_def_rrr_weight_fraction,w_awr_def_qan_raw,w_awr_def_qan_score,w_awr_def_qan_cat,w_awr_def_qan_label,w_awr_def_qan_weight_fraction,w_awr_def_qal_raw,w_awr_def_qal_score,w_awr_def_qal_cat,w_awr_def_qal_label,w_awr_def_qal_weight_fraction,w_awr_def_tot_raw,w_awr_def_tot_score,w_awr_def_tot_cat,w_awr_def_tot_label,w_awr_def_tot_weight_fraction,w_awr_con_rrr_raw,w_awr_con_rrr_score,w_awr_con_rrr_cat,w_awr_con_rrr_label,w_awr_con_rrr_weight_fraction,w_awr_con_qan_raw,w_awr_con_qan_score,w_awr_con_qan_cat,w_awr_con_qan_label,w_awr_con_qan_weight_fraction,w_awr_con_qal_raw,w_awr_con_qal_score,w_awr_con_qal_cat,w_awr_con_qal_label,w_awr_con_qal_weight_fraction,w_awr_con_tot_raw,w_awr_con_tot_score,w_awr_con_tot_cat,w_awr_con_tot_label,w_awr_con_tot_weight_fraction
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,4.223754,58.0,2.8,2.0,Medium - High (50-60%),0.843958,,,Insignificant Trend,2.318042,5.0,4.0,Extremely High (>80%),0.01057,0.0,0.0,Low (<2.5%),0.6139,2.046333,2.0,Medium - High (60-90%),0.0,2.0,2.0,Medium - High (0 to 1),0.074383,4.180674,4.0,Extremely High (more than 1 in 100),0.023176,0.890711,0.0,Low (<2.5%),0.962396,2.887187,2.0,Medium - High (0.66-1.00),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",1.035414,4.141657,4.0,Extremely High (>1.00),0.987061,4.948243,4.0,Extremely High (>75%),1.622678,2.165272,2.0,Medium - High (2-3),0.163265,4.520815,4.819672,4.0,Extremely High (4-5),0.428571,2.023167,1.768397,1.0,Low - Medium (1-2),0.163265,3.354159,4.290963,4.0,Extremely High (4-5),0.755102,1.230237,1.856046,1.0,Low - Medium (1-2),0.228571,3.117716,4.291653,4.0,Extremely High (4-5),0.533333,2.041185,1.792491,1.0,Low - Medium (1-2),0.085714,2.49987,3.688866,3.0,High (3-4),0.847619,1.622678,2.165272,2.0,Medium - High (2-3),0.190476,3.88825,4.581623,4.0,Extremely High (4-5),0.380952,2.030889,1.778723,1.0,Low - Medium (1-2),0.285714,2.765669,4.037438,4.0,Extremely High (4-5),0.857143,1.230237,1.856046,1.0,Low - Medium (1-2),0.551724,2.205417,3.814044,3.0,High (3-4),0.367816,2.046333,1.799374,1.0,Low - Medium (1-2),0.011494,1.625569,2.030983,2.0,Medium - High (2-3),0.931034,1.230237,1.856046,1.0,Low - Medium (1-2),0.366412,3.117716,4.291653,4.0,Extremely High (4-5),0.427481,2.030889,1.778723,1.0,Low - Medium (1-2),0.022901,2.240525,3.237816,3.0,High (3-4),0.816794,1.622678,2.165272,2.0,Medium - High (2-3),0.326531,3.489695,4.431637,4.0,Extremely High (4-5),0.346939,2.037067,1.786984,1.0,Low - Medium (1-2),0.204082,2.457171,3.614603,3.0,High (3-4),0.877551,0.583864,1.261225,1.0,Low - Medium (1-2),0.142857,4.296044,4.735085,4.0,Extremely High (4-5),0.420168,2.009267,1.749811,1.0,Low - Medium (1-2),0.168067,3.04498,4.157766,4.0,Extremely High (4-5),0.731092,1.230237,1.856046,1.0,Low - Medium (1-2),0.031915,3.415903,4.403867,4.0,Extremely High (4-5),0.723404,2.009267,1.749811,1.0,Low - Medium (1-2),0.053191,3.237085,4.240526,4.0,Extremely High (4-5),0.808511,0.706983,1.374524,1.0,Low - Medium (1-2),0.183673,4.317097,4.743008,4.0,Extremely High (4-5),0.44898,2.030889,1.778723,1.0,Low - Medium (1-2),0.122449,3.068225,4.167781,4.0,Extremely High (4-5),0.755102,0.916285,1.567133,1.0,Low - Medium (1-2),0.178571,3.941655,4.60172,4.0,Extremely High (4-5),0.5,2.030889,1.778723,1.0,Low - Medium (1-2),0.107143,2.993511,4.135594,4.0,Extremely High (4-5),0.785714
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...,,,,,,,,,,0.742712,,,,,0.843958,,,Insignificant Trend,2.318042,5.0,4.0,Extremely High (>80%),0.01057,0.0,0.0,Low (<2.5%),,,,,0.0,2.0,2.0,Medium - High (0 to 1),0.074383,4.180674,4.0,Extremely High (more than 1 in 100),0.023176,0.890711,0.0,Low (<2.5%),0.962396,2.887187,2.0,Medium - High (0.66-1.00),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",1.035414,4.141657,4.0,Extremely High (>1.00),0.987061,4.948243,4.0,Extremely High (>75%),0.445356,1.133763,1.0,Low - Medium (1-2),0.081633,4.520815,4.819672,4.0,Extremely High (4-5),0.428571,2.0,1.73742,1.0,Low - Medium (1-2),0.081633,3.610984,4.401604,4.0,Extremely High (4-5),0.591837,0.445356,1.133763,1.0,Low - Medium (1-2),0.152381,3.117716,4.291653,4.0,Extremely High (4-5),0.533333,2.0,1.73742,1.0,Low - Medium (1-2),0.009524,2.516682,3.718105,3.0,High (3-4),0.695238,0.445356,1.133763,1.0,Low - Medium (1-2),0.095238,3.88825,4.581623,4.0,Extremely High (4-5),0.380952,2.0,1.73742,1.0,Low - Medium (1-2),0.095238,2.999726,4.138271,4.0,Extremely High (4-5),0.571429,0.445356,1.133763,1.0,Low - Medium (1-2),0.367816,2.205417,3.814044,3.0,High (3-4),0.367816,,,,NoData,0.0,1.325386,1.52418,1.0,Low - Medium (1-2),0.735632,0.445356,1.133763,1.0,Low - Medium (1-2),0.244275,3.117716,4.291653,4.0,Extremely High (4-5),0.427481,2.0,1.73742,1.0,Low - Medium (1-2),0.007634,2.144309,3.070479,3.0,High (3-4),0.679389,0.445356,1.133763,1.0,Low - Medium (1-2),0.163265,3.489695,4.431637,4.0,Extremely High (4-5),0.346939,2.0,1.73742,1.0,Low - Medium (1-2),0.040816,2.477321,3.649648,3.0,High (3-4),0.55102,0.445356,1.133763,1.0,Low - Medium (1-2),0.134454,4.296044,4.735085,4.0,Extremely High (4-5),0.420168,2.0,1.73742,1.0,Low - Medium (1-2),0.134454,3.096682,4.18004,4.0,Extremely High (4-5),0.689076,0.445356,1.133763,1.0,Low - Medium (1-2),0.021277,3.415903,4.403867,4.0,Extremely High (4-5),0.723404,2.0,1.73742,1.0,Low - Medium (1-2),0.042553,3.259083,4.250003,4.0,Extremely High (4-5),0.787234,0.445356,1.133763,1.0,Low - Medium (1-2),0.163265,4.317097,4.743008,4.0,Extremely High (4-5),0.44898,2.0,1.73742,1.0,Low - Medium (1-2),0.040816,3.204343,4.226421,4.0,Extremely High (4-5),0.653061,0.445356,1.133763,1.0,Low - Medium (1-2),0.142857,3.941655,4.60172,4.0,Extremely High (4-5),0.5,2.0,1.73742,1.0,Low - Medium (1-2),0.035714,3.103399,4.182934,4.0,Extremely High (4-5),0.678571
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,258.364251,58.0,2.8,2.0,Medium - High (50-60%),0.843958,,,Insignificant Trend,1.0,5.0,-1.0,Arid and Low Water Use,0.016651,0.413638,0.0,Low (<2.5%),0.6139,2.046333,2.0,Medium - High (60-90%),0.0,2.0,2.0,Medium - High (0 to 1),0.0,0.0,0.0,"Low (0 to 1 in 1,000)",0.045942,1.877895,1.0,Low - Medium (2.5-5%),1.027464,3.082393,3.0,High (1.00-1.33),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",1.161367,4.645469,4.0,Extremely High (>1.00),1.0,5.0,-1.0,Arid and Low Water Use,1.972883,2.410512,2.0,Medium - High (2-3),0.163265,4.39873,4.773728,4.0,Extremely High (4-5),0.428571,2.023167,1.768397,1.0,Low - Medium (1-2),0.163265,3.360587,4.293732,4.0,Extremely High (4-5),0.755102,1.697178,2.217443,2.0,Medium - High (2-3),0.228571,1.980562,3.509487,3.0,High (3-4),0.533333,2.041185,1.792491,1.0,Low - Medium (1-2),0.085714,1.910274,2.607886,2.0,Medium - High (2-3),0.847619,1.972883,2.410512,2.0,Medium - High (2-3),0.190476,3.465983,4.422714,4.0,Extremely High (4-5),0.380952,2.030889,1.778723,1.0,Low - Medium (1-2),0.285714,2.655818,3.960088,3.0,High (3-4),0.857143,1.697178,2.217443,2.0,Medium - High (2-3),0.551724,1.732991,3.174165,3.0,High (3-4),0.367816,2.046333,1.799374,1.0,Low - Medium (1-2),0.011494,1.715637,2.21349,2.0,Medium - High (2-3),0.931034,1.697178,2.217443,2.0,Medium - High (2-3),0.366412,1.980562,3.509487,3.0,High (3-4),0.427481,2.030889,1.778723,1.0,Low - Medium (1-2),0.022901,1.854847,2.495574,2.0,Medium - High (2-3),0.816794,1.972883,2.410512,2.0,Medium - High (2-3),0.326531,3.080784,4.277754,4.0,Extremely High (4-5),0.346939,2.037067,1.786984,1.0,Low - Medium (1-2),0.204082,2.425817,3.560073,3.0,High (3-4),0.877551,1.243075,1.86786,1.0,Low - Medium (1-2),0.142857,4.066571,4.648729,4.0,Extremely High (4-5),0.420168,2.009267,1.749811,1.0,Low - Medium (1-2),0.168067,3.04191,4.156444,4.0,Extremely High (4-5),0.731092,1.697178,2.217443,2.0,Medium - High (2-3),0.031915,2.988838,4.243153,4.0,Extremely High (4-5),0.723404,2.009267,1.749811,1.0,Low - Medium (1-2),0.053191,2.873406,4.083852,4.0,Extremely High (4-5),0.808511,1.32957,1.947457,1.0,Low - Medium (1-2),0.183673,3.98763,4.619022,4.0,Extremely High (4-5),0.44898,2.030889,1.778723,1.0,Low - Medium (1-2),0.122449,3.023766,4.148627,4.0,Extremely High (4-5),0.755102,1.476613,2.062987,2.0,Medium - High (2-3),0.178571,3.409133,4.40132,4.0,Extremely High (4-5),0.5,2.030889,1.778723,1.0,Low - Medium (1-2),0.107143,2.781982,4.044465,4.0,Extremely High (4-5),0.785714
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...,As Suways,EGY,Egypt,El Suweiz|Es Suweis|Suez,محافظة السويس,Muhafazah,Governorate,,EG.SW,1.748248,58.0,2.8,2.0,Medium - High (50-60%),,,,,1.0,5.0,-1.0,Arid and Low Water Use,0.016651,0.413638,0.0,Low (<2.5%),0.6139,2.046333,2.0,Medium - High (60-90%),0.0,2.0,2.0,Medium - High (0 to 1),0.0,0.0,0.0,"Low (0 to 1 in 1,000)",0.045942,1.877895,1.0,Low - Medium (2.5-5%),1.027464,3.082393,3.0,High (1.00-1.33),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",1.161367,4.645469,4.0,Extremely High (>1.00),1.0,5.0,-1.0,Arid and Low Water Use,1.972883,2.410512,2.0,Medium - High (2-3),0.163265,4.39873,4.773728,4.0,Extremely High (4-5),0.428571,2.023167,1.768397,1.0,Low - Medium (1-2),0.163265,3.360587,4.293732,4.0,Extremely High (4-5),0.755102,1.697178,2.217443,2.0,Medium - High (2-3),0.228571,1.980562,3.509487,3.0,High (3-4),0.533333,2.041185,1.792491,1.0,Low - Medium (1-2),0.085714,1.910274,2.607886,2.0,Medium - High (2-3),0.847619,1.972883,2.410512,2.0,Medium - High (2-3),0.190476,3.465983,4.422714,4.0,Extremely High (4-5),0.380952,2.030889,1.778723,1.0,Low - Medium (1-2),0.285714,2.655818,3.960088,3.0,High (3-4),0.857143,1.697178,2.217443,2.0,Medium - High (2-3),0.551724,1.732991,3.174165,3.0,High (3-4),0.367816,2.046333,1.799374,1.0,Low - Medium (1-2),0.011494,1.715637,2.21349,2.0,Medium - High (2-3),0.931034,1.697178,2.217443,2.0,Medium - High (2-3),0.366412,1.980562,3.509487,3.0,High (3-4),0.427481,2.030889,1.778723,1.0,Low - Medium (1-2),0.022901,1.854847,2.495574,2.0,Medium - High (2-3),0.816794,1.972883,2.410512,2.0,Medium - High (2-3),0.326531,3.080784,4.277754,4.0,Extremely High (4-5),0.346939,2.037067,1.786984,1.0,Low - Medium (1-2),0.204082,2.425817,3.560073,3.0,High (3-4),0.877551,1.243075,1.86786,1.0,Low - Medium (1-2),0.142857,4.066571,4.648729,4.0,Extremely High (4-5),0.420168,2.009267,1.749811,1.0,Low - Medium (1-2),0.168067,3.04191,4.156444,4.0,Extremely High (4-5),0.731092,1.697178,2.217443,2.0,Medium - High (2-3),0.031915,2.988838,4.243153,4.0,Extremely High (4-5),0.723404,2.009267,1.749811,1.0,Low - Medium (1-2),0.053191,2.873406,4.083852,4.0,Extremely High (4-5),0.808511,1.32957,1.947457,1.0,Low - Medium (1-2),0.183673,3.98763,4.619022,4.0,Extremely High (4-5),0.44898,2.030889,1.778723,1.0,Low - Medium (1-2),0.122449,3.023766,4.148627,4.0,Extremely High (4-5),0.755102,1.476613,2.062987,2.0,Medium - High (2-3),0.178571,3.409133,4.40132,4.0,Extremely High (4-5),0.5,2.030889,1.778723,1.0,Low - Medium (1-2),0.107143,2.781982,4.044465,4.0,Extremely High (4-5),0.785714
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...,Al Jizah,EGY,Egypt,El Giza|El Gīzah|Gizeh|Giza|Guizèh,,Muhafazah,Governorate,,EG.JZ,510.53507,58.0,2.8,2.0,Medium - High (50-60%),0.843958,,,Insignificant Trend,1.0,5.0,-1.0,Arid and Low Water Use,0.016651,0.413638,0.0,Low (<2.5%),0.6139,2.046333,2.0,Medium - High (60-90%),0.0,2.0,2.0,Medium - High (0 to 1),0.0,0.0,0.0,"Low (0 to 1 in 1,000)",0.045942,1.877895,1.0,Low - Medium (2.5-5%),1.027464,3.082393,3.0,High (1.00-1.33),,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",1.161367,4.645469,4.0,Extremely High (>1.00),1.0,5.0,-1.0,Arid and Low Water Use,1.972883,2.410512,2.0,Medium - High (2-3),0.163265,4.39873,4.773728,4.0,Extremely High (4-5),0.428571,2.023167,1.768397,1.0,Low - Medium (1-2),0.163265,3.360587,4.293732,4.0,Extremely High (4-5),0.755102,1.697178,2.217443,2.0,Medium - High (2-3),0.228571,1.980562,3.509487,3.0,High (3-4),0.533333,2.041185,1.792491,1.0,Low - Medium (1-2),0.085714,1.910274,2.607886,2.0,Medium - High (2-3),0.847619,1.972883,2.410512,2.0,Medium - High (2-3),0.190476,3.465983,4.422714,4.0,Extremely High (4-5),0.380952,2.030889,1.778723,1.0,Low - Medium (1-2),0.285714,2.655818,3.960088,3.0,High (3-4),0.857143,1.697178,2.217443,2.0,Medium - High (2-3),0.551724,1.732991,3.174165,3.0,High (3-4),0.367816,2.046333,1.799374,1.0,Low - Medium (1-2),0.011494,1.715637,2.21349,2.0,Medium - High (2-3),0.931034,1.697178,2.217443,2.0,Medium - High (2-3),0.366412,1.980562,3.509487,3.0,High (3-4),0.427481,2.030889,1.778723,1.0,Low - Medium (1-2),0.022901,1.854847,2.495574,2.0,Medium - High (2-3),0.816794,1.972883,2.410512,2.0,Medium - High (2-3),0.326531,3.080784,4.277754,4.0,Extremely High (4-5),0.346939,2.037067,1.786984,1.0,Low - Medium (1-2),0.204082,2.425817,3.560073,3.0,High (3-4),0.877551,1.243075,1.86786,1.0,Low - Medium (1-2),0.142857,4.066571,4.648729,4.0,Extremely High (4-5),0.420168,2.009267,1.749811,1.0,Low - Medium (1-2),0.168067,3.04191,4.156444,4.0,Extremely High (4-5),0.731092,1.697178,2.217443,2.0,Medium - High (2-3),0.031915,2.988838,4.243153,4.0,Extremely High (4-5),0.723404,2.009267,1.749811,1.0,Low - Medium (1-2),0.053191,2.873406,4.083852,4.0,Extremely High (4-5),0.808511,1.32957,1.947457,1.0,Low - Medium (1-2),0.183673,3.98763,4.619022,4.0,Extremely High (4-5),0.44898,2.030889,1.778723,1.0,Low - Medium (1-2),0.122449,3.023766,4.148627,4.0,Extremely High (4-5),0.755102,1.476613,2.062987,2.0,Medium - High (2-3),0.178571,3.409133,4.40132,4.0,Extremely High (4-5),0.5,2.030889,1.778723,1.0,Low - Medium (1-2),0.107143,2.781982,4.044465,4.0,Extremely High (4-5),0.785714


In [26]:
gdf_master.sort_index(axis=1,inplace=True)

In [27]:
gdf_master.shape

(68511, 268)

# Save in multiple formats:

1. Geopackage
1. CSV (no geom)
1. Pickle 
1. Bigquery 
1. PostGIS



In [28]:
df_master =gdf_master.drop("geom",axis=1)

In [29]:
destination_path_shp = "{}/{}.shp".format(ec2_output_path,SCRIPT_NAME)
destination_path_csv = "{}/{}.csv".format(ec2_output_path,SCRIPT_NAME)
destination_path_pkl = "{}/{}.pkl".format(ec2_output_path,SCRIPT_NAME)
output_table_name = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()
destination_table = "{}.{}".format(BQ_OUTPUT_DATASET_NAME,BQ_OUTPUT_TABLE_NAME)


In [30]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:02:47.781958


In [31]:
gdf_simple = gdf_master[["string_id","geom"]]

In [32]:
# Saving as geopackage did not work. Therefore saving the unique identifier (string_id) and geom as shapefile. 
# This can be joined in GIS software later
gdf_simple.to_file(filename=destination_path_shp,driver="ESRI Shapefile",encoding ='utf-8')

In [33]:
gdf_master.to_pickle(destination_path_pkl)

In [34]:
df_master.to_csv(destination_path_csv, encoding='utf-8')

In [35]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V10/Y2018M12D14_RH_Master_Horizontal_GPD_V01.dbf to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/outp

In [36]:
gdf_master2 = gdf_master.rename(columns={"geom":"geometry"})
gdf_master2 = gpd.GeoDataFrame(gdf_master2,geometry="geometry")

In [37]:
gdfFromSQL = uploadGDFtoPostGIS(gdf_master2,output_table_name,False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Create table temppolygon
Create table tempmultipolygon
DROP TABLE IF EXISTS y2018m12d14_rh_master_horizontal_gpd_v01_v10
ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);
CREATE TABLE y2018m12d14_rh_master_horizontal_gpd_v01_v10 AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);
UPDATE y2018m12d14_rh_master_horizontal_gpd_v01_v10 SET geom = st_makevalid(geom);
DROP TABLE temppolygon,tempmultipolygon


In [38]:
gdfFromSQL.to_gbq(destination_table=destination_table,
                  project_id=BQ_PROJECT_ID,
                  chunksize=1000,
                  if_exists="replace")

69it [23:42, 20.62s/it]


In [39]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:51:51.547798


Previous runs:   
0:47:11.853503  
0:48:37.800392  
0:49:17.171412  
0:51:51.547798
