In [1]:
""" Create horizontal table for readability. 
-------------------------------------------------------------------------------

Data is strored vertically in bigquery which means each indicator has its 
own row. This script puts the verious indicators as columns in a new,
horizontal table. Additional useful attributes are added. 

gadm metadata:
https://gadm.org/metadata.html
 

Author: Rutger Hofste
Date: 20181214
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = 'Y2018M12D14_RH_Master_Horizontal_GPD_V01'
OUTPUT_VERSION = 7


# AWS RDS PostGIS
DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

POSTGIS_INPUT_TABLE_NAME = "y2018m12d06_rh_master_shape_v01_v02"

# BigQuery 
BQ_IN = {}
# gadm
BQ_IN["GADM36L01"] = "y2018m11d12_rh_gadm36_level1_rds_to_bq_v01_v01"
# Area 
BQ_IN["area"] = 'y2018m12d07_rh_process_area_bq_v01_v01'

# too slow, using s3 instead
BQ_IN["indicators"] = 'y2018m12d11_rh_master_weights_gpd_v02_v06'
S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V07"

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME = "aqueduct30v01"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("PostGIS table name: ", POSTGIS_INPUT_TABLE_NAME,
      "\nBQ_OUTPUT_DATASET_NAME: ", BQ_OUTPUT_DATASET_NAME,
      "\nBQ_OUTPUT_TABLE_NAME: ", BQ_OUTPUT_TABLE_NAME,
      "\ns3_output_path: ", s3_output_path,
      "\nec2_output_path:" , ec2_output_path)

PostGIS table name:  y2018m12d06_rh_master_shape_v01_v02 
BQ_OUTPUT_DATASET_NAME:  aqueduct30v01 
BQ_OUTPUT_TABLE_NAME:  y2018m12d14_rh_master_horizontal_gpd_v01_v07 
s3_output_path:  s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/ 
ec2_output_path: /volumes/data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M02D18 UTC 15:32


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

rm: cannot remove '/volumes/data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07': No such file or directory


In [4]:
!aws s3 cp {S3_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V07/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V07/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl
download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V07/Y2018M12D11_RH_Master_Weights_GPD_V02.csv to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V07/Y2018M12D11_RH_Master_Weights_GPD_V02.csv


In [5]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import sqlalchemy
from google.cloud import bigquery
from shapely.geometry.multipolygon import MultiPolygon
from geoalchemy2 import Geometry, WKTElement

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

%matplotlib inline
pd.set_option('display.max_columns', 500)


In [6]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
connection = engine.connect()

In [7]:
def uploadGDFtoPostGIS(gdf,tableName,saveIndex):
    # this function uploads a polygon shapefile to table in AWS RDS. 
    # It handles combined polygon/multipolygon geometry and stores it in valid multipolygon in epsg 4326.
    
    # gdf = input geoDataframe
    # tableName = postGIS table name (string)
    # saveIndex = save index column in separate column in postgresql, otherwise discarded. (Boolean)
    
    
    gdf["type"] = gdf.geometry.geom_type    
    geomTypes = ["Polygon","MultiPolygon"]
    
    for geomType in geomTypes:
        gdfType = gdf.loc[gdf["type"]== geomType]
        geomTypeLower = str.lower(geomType)
        gdfType['geom'] = gdfType['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
        gdfType.drop(["geometry","type"],1, inplace=True)      
        print("Create table temp%s" %(geomTypeLower)) 
        gdfType.to_sql(
            name = "temp%s" %(geomTypeLower),
            con = engine,
            if_exists='replace',
            index= saveIndex, 
            dtype={'geom': Geometry(str.upper(geomType), srid= 4326)}
        )
        
    # Merge both tables and make valid
    sql = []
    sql.append("DROP TABLE IF EXISTS %s"  %(tableName))
    sql.append("ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);")
    sql.append("CREATE TABLE %s AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);" %(tableName))
    sql.append("UPDATE %s SET geom = st_makevalid(geom);" %(tableName))
    sql.append("DROP TABLE temppolygon,tempmultipolygon")

    for statement in sql:
        print(statement)
        result = connection.execute(statement)    
    gdfFromSQL =gpd.GeoDataFrame.from_postgis("select * from %s" %(tableName),connection,geom_col='geom' )
    return gdfFromSQL

In [8]:
sql = """
SELECT
    aq30_id,
    string_id,
    pfaf_id,
    gid_1, 
    aqid,
    geom
FROM {}
""".format(POSTGIS_INPUT_TABLE_NAME)

In [9]:
gdf =gpd.GeoDataFrame.from_postgis(sql,engine,geom_col='geom')

In [10]:
gdf.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...


In [11]:
gdf.shape

(68511, 6)

In [12]:
gdf_master = gdf

In [13]:
sql_gadm = """
SELECT
  gid_1,
  name_1,
  gid_0,
  name_0,
  varname_1,
  nl_name_1,
  type_1,
  engtype_1,
  cc_1,
  hasc_1
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["GADM36L01"])

In [14]:
df_gadm = pd.read_gbq(query=sql_gadm,dialect="standard")

In [15]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_gadm,
                      left_on ="gid_1",
                      right_on = "gid_1",
                      how = "left")

In [16]:
sql_area = """
SELECT
  string_id,
  area_km2
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["area"])
df_area = pd.read_gbq(query=sql_area,dialect="standard")

In [17]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_area,
                      left_on ="string_id",
                      right_on = "string_id",
                      how = "left")

In [18]:
sql_in = """
SELECT
  string_id,
  indicator,
  group_short,
  industry_short,
  raw,
  score,
  cat,
  label,
  weight_fraction,
  weighted_score
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["indicators"])
#df_in = pd.read_gbq(query=sql_in,dialect="standard") # Takes too long, reverting to pickled file instead

In [19]:
source_path = "{}/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl".format(ec2_input_path)

In [20]:
df_in = pd.read_pickle(source_path)

In [21]:
df_in.head()

Unnamed: 0,cat,group_short,indicator,industry_short,label,raw,score,string_id,weight_fraction,weighted_score
0,2.0,qal,cep,ong,Medium to high (0 to +1),0.0,2.0,111011-EGY.11_1-3365,0.0,0.0
1,2.0,qal,cep,fnb,Medium to high (0 to +1),0.0,2.0,111011-EGY.11_1-3365,0.081633,0.163265
2,2.0,qal,cep,smc,Medium to high (0 to +1),0.0,2.0,111011-EGY.11_1-3365,0.095238,0.190476
3,2.0,qal,cep,agr,Medium to high (0 to +1),0.0,2.0,111011-EGY.11_1-3365,0.134454,0.268908
4,2.0,qal,cep,che,Medium to high (0 to +1),0.0,2.0,111011-EGY.11_1-3365,0.009524,0.019048


# Append (horizontally) all indicators

In [22]:
indicators = list(df_in["indicator"].unique())
indicators.remove('awr')
for indicator in indicators:
    print(indicator)
    df_sel = df_in.loc[(df_in["industry_short"] == "def") &(df_in["indicator"] == indicator)]
    df_out = df_sel[["string_id","raw","score","cat","label"]]
    df_out.columns = ["string_id",
                      indicator + "_raw",
                      indicator +"_score",
                      indicator +"_cat",
                      indicator +"_label"]
    gdf_master = pd.merge(left=gdf_master,
                          right=df_out,
                          left_on ="string_id",
                          right_on = "string_id",
                          how="left")

cep
drr
udw
gtd
bws
ucw
cfr
usa
bwd
rri
sev
iav
rfr


In [23]:
gdf_master.loc[gdf_master["string_id"] == "253001-SJM.2_1-89"]

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,cep_raw,cep_score,cep_cat,cep_label,drr_raw,drr_score,drr_cat,drr_label,udw_raw,udw_score,udw_cat,udw_label,gtd_raw,gtd_score,gtd_cat,gtd_label,bws_raw,bws_score,bws_cat,bws_label,ucw_raw,ucw_score,ucw_cat,ucw_label,cfr_raw,cfr_score,cfr_cat,cfr_label,usa_raw,usa_score,usa_cat,usa_label,bwd_raw,bwd_score,bwd_cat,bwd_label,rri_raw,rri_score,rri_cat,rri_label,sev_raw,sev_score,sev_cat,sev_label,iav_raw,iav_score,iav_cat,iav_label,rfr_raw,rfr_score,rfr_cat,rfr_label
7731,20607,253001-SJM.2_1-89,253001,SJM.2_1,89,(POLYGON ((16.34196631819108 80.05218058366709...,Svalbard,SJM,Svalbard and Jan Mayen,,,Territory,Territory,,SJ.SV,35921.638036,-1.438554,1.712289,1.0,Low to medium (-5 to 0),,,,,0.0,0.0,0.0,Low (< 2.5%),0.01858,,,Insignificant Trend,,,,,,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.0,0.0,0.0,Low (< 2.5%),,,,,,,,,0.925506,2.776519,2.0,Medium - High,0.380097,1.520388,1.0,Low - Medium,0.0,0.0,0.0,"Low (0 to 1 in 1,000)"


# Append (horizontally) all aggregated water risk scores

In [24]:
indicator = "awr"
industries = list(df_in["industry_short"].unique())
groups = list(df_in["group_short"].unique())

for industry in industries:
    for group in groups:
        print(industry,group)
        df_sel = df_in.loc[(df_in["industry_short"] == industry) &(df_in["group_short"] == group) &(df_in["indicator"] == indicator)]
        df_out = df_sel[["string_id","raw","score","cat","label"]]

        df_out.columns = ["string_id",
                          "w_{}_{}_{}_raw".format(indicator,industry,group),
                          "w_{}_{}_{}_score".format(indicator,industry,group),
                          "w_{}_{}_{}_cat".format(indicator,industry,group),
                          "w_{}_{}_{}_label".format(indicator,industry,group)]
        gdf_master = pd.merge(left=gdf_master,
                              right=df_out,
                              left_on ="string_id",
                              right_on = "string_id",
                              how="left")

ong qal
ong qan
ong rrr
ong tot
fnb qal
fnb qan
fnb rrr
fnb tot
smc qal
smc qan
smc rrr
smc tot
agr qal
agr qan
agr rrr
agr tot
che qal
che qan
che rrr
che tot
min qal
min qan
min rrr
min tot
con qal
con qan
con rrr
con tot
def qal
def qan
def rrr
def tot
elp qal
elp qan
elp rrr
elp tot
tex qal
tex qan
tex rrr
tex tot


In [25]:
gdf_master.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,cep_raw,cep_score,cep_cat,cep_label,drr_raw,drr_score,drr_cat,drr_label,udw_raw,udw_score,udw_cat,udw_label,gtd_raw,gtd_score,gtd_cat,gtd_label,bws_raw,bws_score,bws_cat,bws_label,ucw_raw,ucw_score,ucw_cat,ucw_label,cfr_raw,cfr_score,cfr_cat,cfr_label,usa_raw,usa_score,usa_cat,usa_label,bwd_raw,bwd_score,bwd_cat,bwd_label,rri_raw,rri_score,rri_cat,rri_label,sev_raw,sev_score,sev_cat,sev_label,iav_raw,iav_score,iav_cat,iav_label,rfr_raw,rfr_score,rfr_cat,rfr_label,w_awr_ong_qal_raw,w_awr_ong_qal_score,w_awr_ong_qal_cat,w_awr_ong_qal_label,w_awr_ong_qan_raw,w_awr_ong_qan_score,w_awr_ong_qan_cat,w_awr_ong_qan_label,w_awr_ong_rrr_raw,w_awr_ong_rrr_score,w_awr_ong_rrr_cat,w_awr_ong_rrr_label,w_awr_ong_tot_raw,w_awr_ong_tot_score,w_awr_ong_tot_cat,w_awr_ong_tot_label,w_awr_fnb_qal_raw,w_awr_fnb_qal_score,w_awr_fnb_qal_cat,w_awr_fnb_qal_label,w_awr_fnb_qan_raw,w_awr_fnb_qan_score,w_awr_fnb_qan_cat,w_awr_fnb_qan_label,w_awr_fnb_rrr_raw,w_awr_fnb_rrr_score,w_awr_fnb_rrr_cat,w_awr_fnb_rrr_label,w_awr_fnb_tot_raw,w_awr_fnb_tot_score,w_awr_fnb_tot_cat,w_awr_fnb_tot_label,w_awr_smc_qal_raw,w_awr_smc_qal_score,w_awr_smc_qal_cat,w_awr_smc_qal_label,w_awr_smc_qan_raw,w_awr_smc_qan_score,w_awr_smc_qan_cat,w_awr_smc_qan_label,w_awr_smc_rrr_raw,w_awr_smc_rrr_score,w_awr_smc_rrr_cat,w_awr_smc_rrr_label,w_awr_smc_tot_raw,w_awr_smc_tot_score,w_awr_smc_tot_cat,w_awr_smc_tot_label,w_awr_agr_qal_raw,w_awr_agr_qal_score,w_awr_agr_qal_cat,w_awr_agr_qal_label,w_awr_agr_qan_raw,w_awr_agr_qan_score,w_awr_agr_qan_cat,w_awr_agr_qan_label,w_awr_agr_rrr_raw,w_awr_agr_rrr_score,w_awr_agr_rrr_cat,w_awr_agr_rrr_label,w_awr_agr_tot_raw,w_awr_agr_tot_score,w_awr_agr_tot_cat,w_awr_agr_tot_label,w_awr_che_qal_raw,w_awr_che_qal_score,w_awr_che_qal_cat,w_awr_che_qal_label,w_awr_che_qan_raw,w_awr_che_qan_score,w_awr_che_qan_cat,w_awr_che_qan_label,w_awr_che_rrr_raw,w_awr_che_rrr_score,w_awr_che_rrr_cat,w_awr_che_rrr_label,w_awr_che_tot_raw,w_awr_che_tot_score,w_awr_che_tot_cat,w_awr_che_tot_label,w_awr_min_qal_raw,w_awr_min_qal_score,w_awr_min_qal_cat,w_awr_min_qal_label,w_awr_min_qan_raw,w_awr_min_qan_score,w_awr_min_qan_cat,w_awr_min_qan_label,w_awr_min_rrr_raw,w_awr_min_rrr_score,w_awr_min_rrr_cat,w_awr_min_rrr_label,w_awr_min_tot_raw,w_awr_min_tot_score,w_awr_min_tot_cat,w_awr_min_tot_label,w_awr_con_qal_raw,w_awr_con_qal_score,w_awr_con_qal_cat,w_awr_con_qal_label,w_awr_con_qan_raw,w_awr_con_qan_score,w_awr_con_qan_cat,w_awr_con_qan_label,w_awr_con_rrr_raw,w_awr_con_rrr_score,w_awr_con_rrr_cat,w_awr_con_rrr_label,w_awr_con_tot_raw,w_awr_con_tot_score,w_awr_con_tot_cat,w_awr_con_tot_label,w_awr_def_qal_raw,w_awr_def_qal_score,w_awr_def_qal_cat,w_awr_def_qal_label,w_awr_def_qan_raw,w_awr_def_qan_score,w_awr_def_qan_cat,w_awr_def_qan_label,w_awr_def_rrr_raw,w_awr_def_rrr_score,w_awr_def_rrr_cat,w_awr_def_rrr_label,w_awr_def_tot_raw,w_awr_def_tot_score,w_awr_def_tot_cat,w_awr_def_tot_label,w_awr_elp_qal_raw,w_awr_elp_qal_score,w_awr_elp_qal_cat,w_awr_elp_qal_label,w_awr_elp_qan_raw,w_awr_elp_qan_score,w_awr_elp_qan_cat,w_awr_elp_qan_label,w_awr_elp_rrr_raw,w_awr_elp_rrr_score,w_awr_elp_rrr_cat,w_awr_elp_rrr_label,w_awr_elp_tot_raw,w_awr_elp_tot_score,w_awr_elp_tot_cat,w_awr_elp_tot_label,w_awr_tex_qal_raw,w_awr_tex_qal_score,w_awr_tex_qal_cat,w_awr_tex_qal_label,w_awr_tex_qan_raw,w_awr_tex_qan_score,w_awr_tex_qan_cat,w_awr_tex_qan_label,w_awr_tex_rrr_raw,w_awr_tex_rrr_score,w_awr_tex_rrr_cat,w_awr_tex_rrr_label,w_awr_tex_tot_raw,w_awr_tex_tot_score,w_awr_tex_tot_cat,w_awr_tex_tot_label
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,4.223754,0.0,2.0,2.0,Medium to high (0 to +1),,,,,0.011222,0.0,0.0,Low (< 2.5%),0.843958,,,Insignificant Trend,2.318042,5.0,4.0,Extremely High,0.6139,2.046333,2.0,Medium to high (0.6 to 90%),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.025333,1.019067,1.0,Low to medium (2.5 to 5%),0.987061,4.948243,4.0,Extremely High,58.0,2.8,2.0,Medium to high (50 to 60%),0.962396,2.887187,2.0,Medium - High,1.035414,4.141657,4.0,Extremely High,0.074383,4.180674,4.0,Extremely High (more than 1 in 100),2.046333,1.799374,1.0,Low - Medium,2.205417,3.799285,3.0,Medium - High,1.273022,1.845407,1.0,Low - Medium,1.650923,2.050542,2.0,Medium,2.023167,1.768397,1.0,Low - Medium,4.520815,4.818802,4.0,High,1.654767,2.143883,2.0,Medium,3.361097,4.288767,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.88825,4.579606,4.0,High,1.654767,2.143883,2.0,Medium,2.7728,4.033465,4.0,High,2.009267,1.749811,1.0,Low - Medium,4.296044,4.733808,4.0,High,0.644267,1.29686,1.0,Low - Medium,3.056783,4.156704,4.0,High,2.041185,1.792491,1.0,Low - Medium,3.117716,4.288239,4.0,High,1.273022,1.845407,1.0,Low - Medium,2.511408,3.681514,3.0,Medium - High,2.030889,1.778723,1.0,Low - Medium,3.117716,4.288239,4.0,High,1.273022,1.845407,1.0,Low - Medium,2.259718,3.246525,3.0,Medium - High,2.030889,1.778723,1.0,Low - Medium,3.941655,4.5998,4.0,High,0.967627,1.57897,1.0,Low - Medium,3.00518,4.13431,4.0,High,2.030889,1.778723,1.0,Low - Medium,4.317097,4.741769,4.0,High,0.76403,1.401345,1.0,Low - Medium,3.082101,4.167691,4.0,High,2.009267,1.749811,1.0,Low - Medium,3.415903,4.400994,4.0,High,1.273022,1.845407,1.0,Low - Medium,3.238774,4.235683,4.0,High,2.037067,1.786984,1.0,Low - Medium,3.489695,4.428898,4.0,High,1.654767,2.143883,2.0,Medium,2.469111,3.608413,3.0,Medium - High
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...,,,,,,,,,,0.742712,0.0,2.0,2.0,Medium to high (0 to +1),,,,,0.011222,0.0,0.0,Low (< 2.5%),0.843958,,,Insignificant Trend,2.318042,5.0,4.0,Extremely High,,,,,0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.025333,1.019067,1.0,Low to medium (2.5 to 5%),0.987061,4.948243,4.0,Extremely High,,,,,0.962396,2.887187,2.0,Medium - High,1.035414,4.141657,4.0,Extremely High,0.074383,4.180674,4.0,Extremely High (more than 1 in 100),,,,NoData,2.205417,3.799285,3.0,Medium - High,0.509533,1.179314,1.0,Low - Medium,1.357475,1.558507,1.0,Low - Medium,2.0,1.73742,1.0,Low - Medium,4.520815,4.818802,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.619836,4.401052,4.0,High,2.0,1.73742,1.0,Low - Medium,3.88825,4.579606,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.010423,4.136585,4.0,High,2.0,1.73742,1.0,Low - Medium,4.296044,4.733808,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.109204,4.179453,4.0,High,2.0,1.73742,1.0,Low - Medium,3.117716,4.288239,4.0,High,0.509533,1.179314,1.0,Low - Medium,2.530748,3.71494,3.0,Medium - High,2.0,1.73742,1.0,Low - Medium,3.117716,4.288239,4.0,High,0.509533,1.179314,1.0,Low - Medium,2.167384,3.086946,3.0,Medium - High,2.0,1.73742,1.0,Low - Medium,3.941655,4.5998,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.11691,4.182798,4.0,High,2.0,1.73742,1.0,Low - Medium,4.317097,4.741769,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.220388,4.227704,4.0,High,2.0,1.73742,1.0,Low - Medium,3.415903,4.400994,4.0,High,0.509533,1.179314,1.0,Low - Medium,3.260817,4.245249,4.0,High,2.0,1.73742,1.0,Low - Medium,3.489695,4.428898,4.0,High,0.509533,1.179314,1.0,Low - Medium,2.496337,3.655467,3.0,Medium - High
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,258.364251,0.0,2.0,2.0,Medium to high (0 to +1),,,,,0.016651,0.413638,0.0,Low (< 2.5%),0.843958,,,Insignificant Trend,1.0,5.0,-1.0,Arid and Low Water Use,0.6139,2.046333,2.0,Medium to high (0.6 to 90%),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),1.0,5.0,-1.0,Arid and Low Water Use,58.0,2.8,2.0,Medium to high (50 to 60%),1.027464,3.082393,3.0,High,1.161367,4.645469,4.0,Extremely High,0.0,0.0,0.0,"Low (0 to 1 in 1,000)",2.046333,1.799374,1.0,Low - Medium,1.732991,3.167294,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.715637,2.182351,2.0,Medium,2.023167,1.768397,1.0,Low - Medium,4.39873,4.772638,4.0,High,1.972883,2.367654,2.0,Medium,3.360587,4.288546,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.465983,4.419931,4.0,High,1.972883,2.367654,2.0,Medium,2.655818,3.931095,3.0,Medium - High,2.009267,1.749811,1.0,Low - Medium,4.066571,4.647036,4.0,High,1.243075,1.81928,1.0,Low - Medium,3.04191,4.15025,4.0,High,2.041185,1.792491,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.910274,2.578787,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.854847,2.465894,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,3.409133,4.398434,4.0,High,1.476613,2.018566,2.0,Medium,2.781982,4.037449,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.98763,4.617185,4.0,High,1.32957,1.894742,1.0,Low - Medium,3.023766,4.142376,4.0,High,2.009267,1.749811,1.0,Low - Medium,2.988838,4.239505,4.0,High,1.697178,2.173716,2.0,Medium,2.873406,4.077124,4.0,High,2.037067,1.786984,1.0,Low - Medium,3.080784,4.274273,4.0,High,1.972883,2.367654,2.0,Medium,2.425817,3.53359,3.0,Medium - High
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...,As Suways,EGY,Egypt,El Suweiz|Es Suweis|Suez,محافظة السويس,Muhafazah,Governorate,,EG.SW,1.748248,0.0,2.0,2.0,Medium to high (0 to +1),,,,,0.016651,0.413638,0.0,Low (< 2.5%),,,,,1.0,5.0,-1.0,Arid and Low Water Use,0.6139,2.046333,2.0,Medium to high (0.6 to 90%),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),1.0,5.0,-1.0,Arid and Low Water Use,58.0,2.8,2.0,Medium to high (50 to 60%),1.027464,3.082393,3.0,High,1.161367,4.645469,4.0,Extremely High,0.0,0.0,0.0,"Low (0 to 1 in 1,000)",2.046333,1.799374,1.0,Low - Medium,1.732991,3.167294,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.715637,2.182351,2.0,Medium,2.023167,1.768397,1.0,Low - Medium,4.39873,4.772638,4.0,High,1.972883,2.367654,2.0,Medium,3.360587,4.288546,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.465983,4.419931,4.0,High,1.972883,2.367654,2.0,Medium,2.655818,3.931095,3.0,Medium - High,2.009267,1.749811,1.0,Low - Medium,4.066571,4.647036,4.0,High,1.243075,1.81928,1.0,Low - Medium,3.04191,4.15025,4.0,High,2.041185,1.792491,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.910274,2.578787,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.854847,2.465894,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,3.409133,4.398434,4.0,High,1.476613,2.018566,2.0,Medium,2.781982,4.037449,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.98763,4.617185,4.0,High,1.32957,1.894742,1.0,Low - Medium,3.023766,4.142376,4.0,High,2.009267,1.749811,1.0,Low - Medium,2.988838,4.239505,4.0,High,1.697178,2.173716,2.0,Medium,2.873406,4.077124,4.0,High,2.037067,1.786984,1.0,Low - Medium,3.080784,4.274273,4.0,High,1.972883,2.367654,2.0,Medium,2.425817,3.53359,3.0,Medium - High
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...,Al Jizah,EGY,Egypt,El Giza|El Gīzah|Gizeh|Giza|Guizèh,,Muhafazah,Governorate,,EG.JZ,510.53507,0.0,2.0,2.0,Medium to high (0 to +1),,,,,0.016651,0.413638,0.0,Low (< 2.5%),0.843958,,,Insignificant Trend,1.0,5.0,-1.0,Arid and Low Water Use,0.6139,2.046333,2.0,Medium to high (0.6 to 90%),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),1.0,5.0,-1.0,Arid and Low Water Use,58.0,2.8,2.0,Medium to high (50 to 60%),1.027464,3.082393,3.0,High,1.161367,4.645469,4.0,Extremely High,0.0,0.0,0.0,"Low (0 to 1 in 1,000)",2.046333,1.799374,1.0,Low - Medium,1.732991,3.167294,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.715637,2.182351,2.0,Medium,2.023167,1.768397,1.0,Low - Medium,4.39873,4.772638,4.0,High,1.972883,2.367654,2.0,Medium,3.360587,4.288546,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.465983,4.419931,4.0,High,1.972883,2.367654,2.0,Medium,2.655818,3.931095,3.0,Medium - High,2.009267,1.749811,1.0,Low - Medium,4.066571,4.647036,4.0,High,1.243075,1.81928,1.0,Low - Medium,3.04191,4.15025,4.0,High,2.041185,1.792491,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.910274,2.578787,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,1.980562,3.498482,3.0,Medium - High,1.697178,2.173716,2.0,Medium,1.854847,2.465894,2.0,Medium,2.030889,1.778723,1.0,Low - Medium,3.409133,4.398434,4.0,High,1.476613,2.018566,2.0,Medium,2.781982,4.037449,4.0,High,2.030889,1.778723,1.0,Low - Medium,3.98763,4.617185,4.0,High,1.32957,1.894742,1.0,Low - Medium,3.023766,4.142376,4.0,High,2.009267,1.749811,1.0,Low - Medium,2.988838,4.239505,4.0,High,1.697178,2.173716,2.0,Medium,2.873406,4.077124,4.0,High,2.037067,1.786984,1.0,Low - Medium,3.080784,4.274273,4.0,High,1.972883,2.367654,2.0,Medium,2.425817,3.53359,3.0,Medium - High


In [26]:
gdf_master.sort_index(axis=1,inplace=True)

In [27]:
gdf_master.shape

(68511, 228)

# Save in multiple formats:

1. Geopackage
1. CSV (no geom)
1. Pickle 
1. Bigquery 
1. PostGIS



In [28]:
df_master =gdf_master.drop("geom",axis=1)

In [29]:
destination_path_shp = "{}/{}.shp".format(ec2_output_path,SCRIPT_NAME)
destination_path_csv = "{}/{}.csv".format(ec2_output_path,SCRIPT_NAME)
destination_path_pkl = "{}/{}.pkl".format(ec2_output_path,SCRIPT_NAME)
output_table_name = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()
destination_table = "{}.{}".format(BQ_OUTPUT_DATASET_NAME,BQ_OUTPUT_TABLE_NAME)


In [30]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:02:42.948171


In [31]:
gdf_simple = gdf_master[["string_id","geom"]]

In [32]:
# Saving as geopackage did not work. Therefore saving the unique identifier (string_id) and geom as shapefile. 
# This can be joined in GIS software later
gdf_simple.to_file(filename=destination_path_shp,driver="ESRI Shapefile",encoding ='utf-8')

In [33]:
gdf_master.to_pickle(destination_path_pkl)

In [34]:
df_master.to_csv(destination_path_csv, encoding='utf-8')

In [35]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V07/Y2018M12D14_RH_Master_Horizontal_GPD_V01.dbf to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/outp

In [36]:
gdf_master2 = gdf_master.rename(columns={"geom":"geometry"})
gdf_master2 = gpd.GeoDataFrame(gdf_master2,geometry="geometry")

In [37]:
gdfFromSQL = uploadGDFtoPostGIS(gdf_master2,output_table_name,False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Create table temppolygon
Create table tempmultipolygon
DROP TABLE IF EXISTS y2018m12d14_rh_master_horizontal_gpd_v01_v07
ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);
CREATE TABLE y2018m12d14_rh_master_horizontal_gpd_v01_v07 AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);
UPDATE y2018m12d14_rh_master_horizontal_gpd_v01_v07 SET geom = st_makevalid(geom);
DROP TABLE temppolygon,tempmultipolygon


In [38]:
gdfFromSQL.to_gbq(destination_table=destination_table,
                  project_id=BQ_PROJECT_ID,
                  chunksize=1000,
                  if_exists="replace")

69it [21:35, 18.78s/it]


In [39]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:48:37.800392


Previous runs:   
0:47:11.853503  
0:48:37.800392
