In [1]:
""" Create horizontal table for readability. 
-------------------------------------------------------------------------------

Data is strored vertically in bigquery which means each indicator has its 
own row. This script puts the verious indicators as columns in a new,
horizontal table. Additional useful attributes are added. 

gadm metadata:
https://gadm.org/metadata.html
 

Author: Rutger Hofste
Date: 20181214
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = 'Y2018M12D14_RH_Master_Horizontal_GPD_V01'
OUTPUT_VERSION = 4


# AWS RDS PostGIS
DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

POSTGIS_INPUT_TABLE_NAME = "y2018m12d06_rh_master_shape_v01_v02"

# BigQuery 
BQ_IN = {}
# gadm
BQ_IN["GADM36L01"] = "y2018m11d12_rh_gadm36_level1_rds_to_bq_v01_v01"
# Area 
BQ_IN["area"] = 'y2018m12d07_rh_process_area_bq_v01_v01'

# too slow, using s3 instead
BQ_IN["indicators"] = 'y2018m12d11_rh_master_weights_gpd_v02_v01'
S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V04"

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME = "aqueduct30v01"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("PostGIS table name: ", POSTGIS_INPUT_TABLE_NAME,
      "\nBQ_OUTPUT_DATASET_NAME: ", BQ_OUTPUT_DATASET_NAME,
      "\nBQ_OUTPUT_TABLE_NAME: ", BQ_OUTPUT_TABLE_NAME,
      "\ns3_output_path: ", s3_output_path,
      "\nec2_output_path:" , ec2_output_path)

PostGIS table name:  y2018m12d06_rh_master_shape_v01_v02 
BQ_OUTPUT_DATASET_NAME:  aqueduct30v01 
BQ_OUTPUT_TABLE_NAME:  y2018m12d14_rh_master_horizontal_gpd_v01_v04 
s3_output_path:  s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/ 
ec2_output_path: /volumes/data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M12D18 UTC 02:00


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_output_path}
!mkdir -p {ec2_output_path}

In [4]:
!aws s3 cp {S3_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V04/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V04/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl
download: s3://wri-projects/Aqueduct30/processData/Y2018M12D11_RH_Master_Weights_GPD_V02/output_V04/Y2018M12D11_RH_Master_Weights_GPD_V02.csv to ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/input_V04/Y2018M12D11_RH_Master_Weights_GPD_V02.csv


In [5]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import sqlalchemy
from google.cloud import bigquery
from shapely.geometry.multipolygon import MultiPolygon
from geoalchemy2 import Geometry, WKTElement

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

%matplotlib inline
pd.set_option('display.max_columns', 500)


In [6]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
connection = engine.connect()

In [7]:
def uploadGDFtoPostGIS(gdf,tableName,saveIndex):
    # this function uploads a polygon shapefile to table in AWS RDS. 
    # It handles combined polygon/multipolygon geometry and stores it in valid multipolygon in epsg 4326.
    
    # gdf = input geoDataframe
    # tableName = postGIS table name (string)
    # saveIndex = save index column in separate column in postgresql, otherwise discarded. (Boolean)
    
    
    gdf["type"] = gdf.geometry.geom_type    
    geomTypes = ["Polygon","MultiPolygon"]
    
    for geomType in geomTypes:
        gdfType = gdf.loc[gdf["type"]== geomType]
        geomTypeLower = str.lower(geomType)
        gdfType['geom'] = gdfType['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
        gdfType.drop(["geometry","type"],1, inplace=True)      
        print("Create table temp%s" %(geomTypeLower)) 
        gdfType.to_sql(
            name = "temp%s" %(geomTypeLower),
            con = engine,
            if_exists='replace',
            index= saveIndex, 
            dtype={'geom': Geometry(str.upper(geomType), srid= 4326)}
        )
        
    # Merge both tables and make valid
    sql = []
    sql.append("DROP TABLE IF EXISTS %s"  %(tableName))
    sql.append("ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);")
    sql.append("CREATE TABLE %s AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);" %(tableName))
    sql.append("UPDATE %s SET geom = st_makevalid(geom);" %(tableName))
    sql.append("DROP TABLE temppolygon,tempmultipolygon")

    for statement in sql:
        print(statement)
        result = connection.execute(statement)    
    gdfFromSQL =gpd.GeoDataFrame.from_postgis("select * from %s" %(tableName),connection,geom_col='geom' )
    return gdfFromSQL

In [8]:
sql = """
SELECT
    aq30_id,
    string_id,
    pfaf_id,
    gid_1, 
    aqid,
    geom
FROM {}
""".format(POSTGIS_INPUT_TABLE_NAME)

In [9]:
gdf =gpd.GeoDataFrame.from_postgis(sql,engine,geom_col='geom')

In [10]:
gdf.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...


In [11]:
gdf.shape

(68511, 6)

In [12]:
gdf_master = gdf

In [13]:
sql_gadm = """
SELECT
  gid_1,
  name_1,
  gid_0,
  name_0,
  varname_1,
  nl_name_1,
  type_1,
  engtype_1,
  cc_1,
  hasc_1
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["GADM36L01"])

In [14]:
df_gadm = pd.read_gbq(query=sql_gadm,dialect="standard")

In [15]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_gadm,
                      left_on ="gid_1",
                      right_on = "gid_1",
                      how = "left")

In [16]:
sql_area = """
SELECT
  string_id,
  area_km2
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["area"])
df_area = pd.read_gbq(query=sql_area,dialect="standard")

In [17]:
gdf_master = pd.merge(left=gdf_master,
                      right=df_area,
                      left_on ="string_id",
                      right_on = "string_id",
                      how = "left")

In [18]:
sql_in = """
SELECT
  string_id,
  indicator,
  group_short,
  industry_short,
  raw,
  score,
  cat,
  label,
  weight_fraction,
  weighted_score
FROM
  `{}.{}.{}`
""".format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN["indicators"])
#df_in = pd.read_gbq(query=sql_in,dialect="standard") # Takes too long, reverting to pickled file instead

In [19]:
source_path = "{}/Y2018M12D11_RH_Master_Weights_GPD_V02.pkl".format(ec2_input_path)

In [20]:
df_in = pd.read_pickle(source_path)

In [21]:
df_in.head()

Unnamed: 0,cat,group_short,indicator,industry_short,label,raw,score,string_id,weight_fraction,weighted_score
0,,qan,drr,def,,,,111011-EGY.11_1-3365,,
1,,qan,drr,fnb,,,,111011-EGY.11_1-3365,,
2,,qan,drr,che,,,,111011-EGY.11_1-3365,,
3,,qan,drr,agr,,,,111011-EGY.11_1-3365,,
4,,qan,drr,elp,,,,111011-EGY.11_1-3365,,


# Append (horizontally) all indicators

In [22]:
indicators = list(df_in["indicator"].unique())
indicators.remove('awr')
for indicator in indicators:
    print(indicator)
    df_sel = df_in.loc[(df_in["industry_short"] == "def") &(df_in["indicator"] == indicator)]
    df_out = df_sel[["string_id","raw","score","cat","label"]]
    df_out.columns = ["string_id",
                      indicator + "_raw",
                      indicator +"_score",
                      indicator +"_cat",
                      indicator +"_label"]
    gdf_master = pd.merge(left=gdf_master,
                          right=df_out,
                          left_on ="string_id",
                          right_on = "string_id",
                          how="left")

drr
udw
sev
gtd
cep
cfr
usa
rri
ucw
iav
bwd
bws


In [23]:
gdf_master.loc[gdf_master["string_id"] == "253001-SJM.2_1-89"]

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,drr_raw,drr_score,drr_cat,drr_label,udw_raw,udw_score,udw_cat,udw_label,sev_raw,sev_score,sev_cat,sev_label,gtd_raw,gtd_score,gtd_cat,gtd_label,cep_raw,cep_score,cep_cat,cep_label,cfr_raw,cfr_score,cfr_cat,cfr_label,usa_raw,usa_score,usa_cat,usa_label,rri_raw,rri_score,rri_cat,rri_label,ucw_raw,ucw_score,ucw_cat,ucw_label,iav_raw,iav_score,iav_cat,iav_label,bwd_raw,bwd_score,bwd_cat,bwd_label,bws_raw,bws_score,bws_cat,bws_label
7731,20607,253001-SJM.2_1-89,253001,SJM.2_1,89,(POLYGON ((16.34196631819108 80.05218058366709...,Svalbard,SJM,Svalbard and Jan Mayen,,,Territory,Territory,,SJ.SV,35921.638036,,,,,0.0,0.0,0.0,Low (< 2.5%),0.925506,2.776519,2.0,Medium - High,0.01858,,,Insignificant Trend,-1.438554,0.890362,0.0,Low (< -5),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.0,0.0,0.0,Low (< 2.5%),,,,,,,,,0.380097,1.520388,1.0,Low - Medium,,,,,,,,


# Append (horizontally) all aggregated water risk scores

In [24]:
indicator = "awr"
industries = list(df_in["industry_short"].unique())
groups = list(df_in["group_short"].unique())

for industry in industries:
    for group in groups:
        print(industry,group)
        df_sel = df_in.loc[(df_in["industry_short"] == industry) &(df_in["group_short"] == group) &(df_in["indicator"] == indicator)]
        df_out = df_sel[["string_id","raw","score","cat","label"]]

        df_out.columns = ["string_id",
                          "w_{}_{}_{}_raw".format(indicator,industry,group),
                          "w_{}_{}_{}_score".format(indicator,industry,group),
                          "w_{}_{}_{}_cat".format(indicator,industry,group),
                          "w_{}_{}_{}_label".format(indicator,industry,group)]
        gdf_master = pd.merge(left=gdf_master,
                              right=df_out,
                              left_on ="string_id",
                              right_on = "string_id",
                              how="left")

def qan
def rrr
def qal
def tot
fnb qan
fnb rrr
fnb qal
fnb tot
che qan
che rrr
che qal
che tot
agr qan
agr rrr
agr qal
agr tot
elp qan
elp rrr
elp qal
elp tot
min qan
min rrr
min qal
min tot
ong qan
ong rrr
ong qal
ong tot
smc qan
smc rrr
smc qal
smc tot
con qan
con rrr
con qal
con tot
tex qan
tex rrr
tex qal
tex tot


In [25]:
gdf_master.head()

Unnamed: 0,aq30_id,string_id,pfaf_id,gid_1,aqid,geom,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,area_km2,drr_raw,drr_score,drr_cat,drr_label,udw_raw,udw_score,udw_cat,udw_label,sev_raw,sev_score,sev_cat,sev_label,gtd_raw,gtd_score,gtd_cat,gtd_label,cep_raw,cep_score,cep_cat,cep_label,cfr_raw,cfr_score,cfr_cat,cfr_label,usa_raw,usa_score,usa_cat,usa_label,rri_raw,rri_score,rri_cat,rri_label,ucw_raw,ucw_score,ucw_cat,ucw_label,iav_raw,iav_score,iav_cat,iav_label,bwd_raw,bwd_score,bwd_cat,bwd_label,bws_raw,bws_score,bws_cat,bws_label,w_awr_def_qan_raw,w_awr_def_qan_score,w_awr_def_qan_cat,w_awr_def_qan_label,w_awr_def_rrr_raw,w_awr_def_rrr_score,w_awr_def_rrr_cat,w_awr_def_rrr_label,w_awr_def_qal_raw,w_awr_def_qal_score,w_awr_def_qal_cat,w_awr_def_qal_label,w_awr_def_tot_raw,w_awr_def_tot_score,w_awr_def_tot_cat,w_awr_def_tot_label,w_awr_fnb_qan_raw,w_awr_fnb_qan_score,w_awr_fnb_qan_cat,w_awr_fnb_qan_label,w_awr_fnb_rrr_raw,w_awr_fnb_rrr_score,w_awr_fnb_rrr_cat,w_awr_fnb_rrr_label,w_awr_fnb_qal_raw,w_awr_fnb_qal_score,w_awr_fnb_qal_cat,w_awr_fnb_qal_label,w_awr_fnb_tot_raw,w_awr_fnb_tot_score,w_awr_fnb_tot_cat,w_awr_fnb_tot_label,w_awr_che_qan_raw,w_awr_che_qan_score,w_awr_che_qan_cat,w_awr_che_qan_label,w_awr_che_rrr_raw,w_awr_che_rrr_score,w_awr_che_rrr_cat,w_awr_che_rrr_label,w_awr_che_qal_raw,w_awr_che_qal_score,w_awr_che_qal_cat,w_awr_che_qal_label,w_awr_che_tot_raw,w_awr_che_tot_score,w_awr_che_tot_cat,w_awr_che_tot_label,w_awr_agr_qan_raw,w_awr_agr_qan_score,w_awr_agr_qan_cat,w_awr_agr_qan_label,w_awr_agr_rrr_raw,w_awr_agr_rrr_score,w_awr_agr_rrr_cat,w_awr_agr_rrr_label,w_awr_agr_qal_raw,w_awr_agr_qal_score,w_awr_agr_qal_cat,w_awr_agr_qal_label,w_awr_agr_tot_raw,w_awr_agr_tot_score,w_awr_agr_tot_cat,w_awr_agr_tot_label,w_awr_elp_qan_raw,w_awr_elp_qan_score,w_awr_elp_qan_cat,w_awr_elp_qan_label,w_awr_elp_rrr_raw,w_awr_elp_rrr_score,w_awr_elp_rrr_cat,w_awr_elp_rrr_label,w_awr_elp_qal_raw,w_awr_elp_qal_score,w_awr_elp_qal_cat,w_awr_elp_qal_label,w_awr_elp_tot_raw,w_awr_elp_tot_score,w_awr_elp_tot_cat,w_awr_elp_tot_label,w_awr_min_qan_raw,w_awr_min_qan_score,w_awr_min_qan_cat,w_awr_min_qan_label,w_awr_min_rrr_raw,w_awr_min_rrr_score,w_awr_min_rrr_cat,w_awr_min_rrr_label,w_awr_min_qal_raw,w_awr_min_qal_score,w_awr_min_qal_cat,w_awr_min_qal_label,w_awr_min_tot_raw,w_awr_min_tot_score,w_awr_min_tot_cat,w_awr_min_tot_label,w_awr_ong_qan_raw,w_awr_ong_qan_score,w_awr_ong_qan_cat,w_awr_ong_qan_label,w_awr_ong_rrr_raw,w_awr_ong_rrr_score,w_awr_ong_rrr_cat,w_awr_ong_rrr_label,w_awr_ong_qal_raw,w_awr_ong_qal_score,w_awr_ong_qal_cat,w_awr_ong_qal_label,w_awr_ong_tot_raw,w_awr_ong_tot_score,w_awr_ong_tot_cat,w_awr_ong_tot_label,w_awr_smc_qan_raw,w_awr_smc_qan_score,w_awr_smc_qan_cat,w_awr_smc_qan_label,w_awr_smc_rrr_raw,w_awr_smc_rrr_score,w_awr_smc_rrr_cat,w_awr_smc_rrr_label,w_awr_smc_qal_raw,w_awr_smc_qal_score,w_awr_smc_qal_cat,w_awr_smc_qal_label,w_awr_smc_tot_raw,w_awr_smc_tot_score,w_awr_smc_tot_cat,w_awr_smc_tot_label,w_awr_con_qan_raw,w_awr_con_qan_score,w_awr_con_qan_cat,w_awr_con_qan_label,w_awr_con_rrr_raw,w_awr_con_rrr_score,w_awr_con_rrr_cat,w_awr_con_rrr_label,w_awr_con_qal_raw,w_awr_con_qal_score,w_awr_con_qal_cat,w_awr_con_qal_label,w_awr_con_tot_raw,w_awr_con_tot_score,w_awr_con_tot_cat,w_awr_con_tot_label,w_awr_tex_qan_raw,w_awr_tex_qan_score,w_awr_tex_qan_cat,w_awr_tex_qan_label,w_awr_tex_rrr_raw,w_awr_tex_rrr_score,w_awr_tex_rrr_cat,w_awr_tex_rrr_label,w_awr_tex_qal_raw,w_awr_tex_qal_score,w_awr_tex_qal_cat,w_awr_tex_qal_label,w_awr_tex_tot_raw,w_awr_tex_tot_score,w_awr_tex_tot_cat,w_awr_tex_tot_label
0,0,111011-EGY.11_1-3365,111011,EGY.11_1,3365,(POLYGON ((31.90590570688292 29.85788703615783...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,4.223754,,,,,0.011222,0.0,0.0,Low (< 2.5%),0.962396,2.887187,2.0,Medium - High,0.843958,,,Insignificant Trend,0.0,1.25,1.0,Low to medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.025333,1.019067,1.0,Low to medium (2.5 to 5%),58.0,2.8,2.0,Medium to high (50 to 60%),0.6139,2.046333,2.0,Medium to high (0.6 to 90%),1.035414,4.141657,4.0,Extremely High,0.987061,4.948243,4.0,Extremely High,2.318042,5.0,4.0,Extremely High,4.330739,4.744205,4.0,High,0.76403,1.401345,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,2.976468,4.122813,4.0,High,4.537822,4.823353,4.0,High,1.654767,2.143883,2.0,Medium,1.648167,1.578897,1.0,Low - Medium,3.254998,4.243553,4.0,High,2.692533,4.118075,4.0,High,1.273022,1.845407,1.0,Low - Medium,1.957852,1.936984,1.0,Low - Medium,2.135267,3.095178,3.0,Medium - High,4.306076,4.734779,4.0,High,0.644267,1.29686,1.0,Low - Medium,1.409267,1.302658,1.0,Low - Medium,2.858041,4.071475,4.0,High,3.313933,4.355577,4.0,High,1.273022,1.845407,1.0,Low - Medium,1.409267,1.302658,1.0,Low - Medium,3.083844,4.16936,4.0,High,2.692533,4.118075,4.0,High,1.273022,1.845407,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,1.913726,2.675565,2.0,Medium,1.923238,3.4795,3.0,Medium - High,1.273022,1.845407,1.0,Low - Medium,2.046333,2.040533,2.0,Medium,1.519507,1.913882,1.0,Low - Medium,3.846476,4.559117,4.0,High,1.654767,2.143883,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.601749,3.851746,3.0,Medium - High,3.901818,4.58027,4.0,High,0.967627,1.57897,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,2.850131,4.068046,4.0,High,3.397565,4.387542,4.0,High,1.654767,2.143883,2.0,Medium,1.887067,1.855135,1.0,Low - Medium,2.349034,3.441878,3.0,Medium - High
1,3,111011-None-3365,111011,-9999,3365,(POLYGON ((32.46194054146073 29.89250514754305...,,,,,,,,,,0.742712,,,,,0.011222,0.0,0.0,Low (< 2.5%),0.962396,2.887187,2.0,Medium - High,0.843958,,,Insignificant Trend,0.0,1.25,1.0,Low to medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.025333,1.019067,1.0,Low to medium (2.5 to 5%),,,,,,,,,1.035414,4.141657,4.0,Extremely High,0.987061,4.948243,4.0,Extremely High,2.318042,5.0,4.0,Extremely High,4.330739,4.744205,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,3.106368,4.179124,4.0,High,4.537822,4.823353,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,3.492663,4.34658,4.0,High,2.692533,4.118075,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,2.054454,2.955915,2.0,Medium,4.306076,4.734779,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,2.900411,4.089843,4.0,High,3.313933,4.355577,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,3.103865,4.178038,4.0,High,2.692533,4.118075,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,1.715841,2.28135,2.0,Medium,1.923238,3.4795,3.0,Medium - High,0.509533,1.179314,1.0,Low - Medium,,,,NoData,1.169262,1.366507,1.0,Low - Medium,3.846476,4.559117,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,2.767672,4.032301,4.0,High,3.901818,4.58027,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,2.947644,4.110318,4.0,High,3.397565,4.387542,4.0,High,0.509533,1.179314,1.0,Low - Medium,1.25,1.118499,1.0,Low - Medium,2.30159,3.364929,3.0,Medium - High
2,5,111012-EGY.11_1-3365,111012,EGY.11_1,3365,(POLYGON ((31.88750000022489 29.85833333371637...,Al Qahirah,EGY,Egypt,Cairo|El Cairo|El Qahira|Le Caire,,Muhafazah,Governorate,,EG.QH,258.364251,,,,,0.016651,0.413638,0.0,Low (< 2.5%),1.027464,3.082393,3.0,High,0.843958,,,Insignificant Trend,0.0,1.25,1.0,Low to medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),58.0,2.8,2.0,Medium to high (50 to 60%),0.6139,2.046333,2.0,Medium to high (0.6 to 90%),1.161367,4.645469,4.0,Extremely High,1.0,5.0,-1.0,Arid and Low Water Use,1.0,5.0,-1.0,Arid and Low Water Use,4.386393,4.765476,4.0,High,1.32957,1.894742,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,3.153695,4.19964,4.0,High,4.618667,4.854252,4.0,High,1.972883,2.367654,2.0,Medium,1.648167,1.578897,1.0,Low - Medium,3.370604,4.293668,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.957852,1.936984,1.0,Low - Medium,2.31869,3.392664,3.0,Medium - High,4.420186,4.778392,4.0,High,1.243075,1.81928,1.0,Low - Medium,1.409267,1.302658,1.0,Low - Medium,3.043929,4.152057,4.0,High,3.38735,4.383638,4.0,High,1.697178,2.173716,2.0,Medium,1.409267,1.302658,1.0,Low - Medium,3.167337,4.205553,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.172732,3.155941,3.0,Medium - High,1.980562,3.544312,3.0,Medium - High,1.697178,2.173716,2.0,Medium,2.046333,2.040533,2.0,Medium,1.804761,2.458492,2.0,Medium,3.961123,4.602936,4.0,High,1.972883,2.367654,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.723808,4.013286,4.0,High,3.977322,4.609127,4.0,High,1.476613,2.018566,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,3.02268,4.142845,4.0,High,3.491555,4.423465,4.0,High,1.972883,2.367654,2.0,Medium,1.887067,1.855135,1.0,Low - Medium,2.507564,3.698992,3.0,Medium - High
3,7,111012-EGY.15_1-None,111012,EGY.15_1,-9999,(POLYGON ((32.36609158210641 29.63378296420298...,As Suways,EGY,Egypt,El Suweiz|Es Suweis|Suez,محافظة السويس,Muhafazah,Governorate,,EG.SW,1.748248,,,,,0.016651,0.413638,0.0,Low (< 2.5%),1.027464,3.082393,3.0,High,,,,,0.0,1.25,1.0,Low to medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),58.0,2.8,2.0,Medium to high (50 to 60%),0.6139,2.046333,2.0,Medium to high (0.6 to 90%),1.161367,4.645469,4.0,Extremely High,1.0,5.0,-1.0,Arid and Low Water Use,1.0,5.0,-1.0,Arid and Low Water Use,4.386393,4.765476,4.0,High,1.32957,1.894742,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,3.153695,4.19964,4.0,High,4.618667,4.854252,4.0,High,1.972883,2.367654,2.0,Medium,1.648167,1.578897,1.0,Low - Medium,3.370604,4.293668,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.957852,1.936984,1.0,Low - Medium,2.31869,3.392664,3.0,Medium - High,4.420186,4.778392,4.0,High,1.243075,1.81928,1.0,Low - Medium,1.409267,1.302658,1.0,Low - Medium,3.043929,4.152057,4.0,High,3.38735,4.383638,4.0,High,1.697178,2.173716,2.0,Medium,1.409267,1.302658,1.0,Low - Medium,3.167337,4.205553,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.172732,3.155941,3.0,Medium - High,1.980562,3.544312,3.0,Medium - High,1.697178,2.173716,2.0,Medium,2.046333,2.040533,2.0,Medium,1.804761,2.458492,2.0,Medium,3.961123,4.602936,4.0,High,1.972883,2.367654,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.723808,4.013286,4.0,High,3.977322,4.609127,4.0,High,1.476613,2.018566,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,3.02268,4.142845,4.0,High,3.491555,4.423465,4.0,High,1.972883,2.367654,2.0,Medium,1.887067,1.855135,1.0,Low - Medium,2.507564,3.698992,3.0,Medium - High
4,8,111012-EGY.8_1-3365,111012,EGY.8_1,3365,(POLYGON ((31.89332770960627 29.73370743035362...,Al Jizah,EGY,Egypt,El Giza|El Gīzah|Gizeh|Giza|Guizèh,,Muhafazah,Governorate,,EG.JZ,510.53507,,,,,0.016651,0.413638,0.0,Low (< 2.5%),1.027464,3.082393,3.0,High,0.843958,,,Insignificant Trend,0.0,1.25,1.0,Low to medium (-5 to 0),0.0,0.0,0.0,"Low (0 to 9 in 1,000,000)",0.045942,1.877895,1.0,Low to medium (2.5 to 5%),58.0,2.8,2.0,Medium to high (50 to 60%),0.6139,2.046333,2.0,Medium to high (0.6 to 90%),1.161367,4.645469,4.0,Extremely High,1.0,5.0,-1.0,Arid and Low Water Use,1.0,5.0,-1.0,Arid and Low Water Use,4.386393,4.765476,4.0,High,1.32957,1.894742,1.0,Low - Medium,1.780889,1.732363,1.0,Low - Medium,3.153695,4.19964,4.0,High,4.618667,4.854252,4.0,High,1.972883,2.367654,2.0,Medium,1.648167,1.578897,1.0,Low - Medium,3.370604,4.293668,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.957852,1.936984,1.0,Low - Medium,2.31869,3.392664,3.0,Medium - High,4.420186,4.778392,4.0,High,1.243075,1.81928,1.0,Low - Medium,1.409267,1.302658,1.0,Low - Medium,3.043929,4.152057,4.0,High,3.38735,4.383638,4.0,High,1.697178,2.173716,2.0,Medium,1.409267,1.302658,1.0,Low - Medium,3.167337,4.205553,4.0,High,2.772786,4.148748,4.0,High,1.697178,2.173716,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.172732,3.155941,3.0,Medium - High,1.980562,3.544312,3.0,Medium - High,1.697178,2.173716,2.0,Medium,2.046333,2.040533,2.0,Medium,1.804761,2.458492,2.0,Medium,3.961123,4.602936,4.0,High,1.972883,2.367654,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,2.723808,4.013286,4.0,High,3.977322,4.609127,4.0,High,1.476613,2.018566,2.0,Medium,1.780889,1.732363,1.0,Low - Medium,3.02268,4.142845,4.0,High,3.491555,4.423465,4.0,High,1.972883,2.367654,2.0,Medium,1.887067,1.855135,1.0,Low - Medium,2.507564,3.698992,3.0,Medium - High


In [26]:
gdf_master.sort_index(axis=1,inplace=True)

In [27]:
gdf_master.shape

(68511, 224)

# Save in multiple formats:

1. Geopackage
1. CSV (no geom)
1. Pickle 
1. Bigquery 
1. PostGIS



In [28]:
df_master =gdf_master.drop("geom",axis=1)

In [29]:
destination_path_shp = "{}/{}.shp".format(ec2_output_path,SCRIPT_NAME)
destination_path_csv = "{}/{}.csv".format(ec2_output_path,SCRIPT_NAME)
destination_path_pkl = "{}/{}.pkl".format(ec2_output_path,SCRIPT_NAME)
output_table_name = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()
destination_table = "{}.{}".format(BQ_OUTPUT_DATASET_NAME,BQ_OUTPUT_TABLE_NAME)


In [30]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:02:44.376142


In [31]:
gdf_simple = gdf_master[["string_id","geom"]]

In [32]:
# Saving as geopackage did not work. Therefore saving the unique identifier (string_id) and geom as shapefile. 
# This can be joined in GIS software later
gdf_simple.to_file(filename=destination_path_shp,driver="ESRI Shapefile",encoding ='utf-8')

In [33]:
gdf_master.to_pickle(destination_path_pkl)

In [34]:
df_master.to_csv(destination_path_csv, encoding='utf-8')

In [35]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.prj
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.cpg
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.shx
upload: ../../../../data/Y2018M12D14_RH_Master_Horizontal_GPD_V01/output_V04/Y2018M12D14_RH_Master_Horizontal_GPD_V01.dbf to s3://wri-projects/Aqueduct30/processData/Y2018M12D14_RH_Master_Horizontal_GPD_V01/outp

In [36]:
gdf_master2 = gdf_master.rename(columns={"geom":"geometry"})
gdf_master2 = gpd.GeoDataFrame(gdf_master2,geometry="geometry")

In [37]:
gdfFromSQL = uploadGDFtoPostGIS(gdf_master2,output_table_name,False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Create table temppolygon
Create table tempmultipolygon
DROP TABLE IF EXISTS y2018m12d14_rh_master_horizontal_gpd_v01_v04
ALTER TABLE temppolygon ALTER COLUMN geom type geometry(MultiPolygon, 4326) using ST_Multi(geom);
CREATE TABLE y2018m12d14_rh_master_horizontal_gpd_v01_v04 AS (SELECT * FROM temppolygon UNION SELECT * FROM tempmultipolygon);
UPDATE y2018m12d14_rh_master_horizontal_gpd_v01_v04 SET geom = st_makevalid(geom);
DROP TABLE temppolygon,tempmultipolygon


In [38]:
gdfFromSQL.to_gbq(destination_table=destination_table,
                  project_id=BQ_PROJECT_ID,
                  chunksize=1000,
                  if_exists="replace")

69it [25:22, 22.06s/it]


In [39]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:53:56.516625


Previous runs:   
0:47:11.853503
