In [1]:
""" Rasterize Aqueduct 30 and store to Google Cloud Storage.
-------------------------------------------------------------------------------

Recap: The quantiles approach has been applied to all weightings, not just
the default one. Therefore there are slight variations in the histogram for
"def" or default weighting. 

Rasterizing the master geom at 30s takes a long time. Consider using the 
simplified version of the master geomtery. 

Update:
- In order to compare overall water risk, it is important to export the
mask with the fraction of valid data.

The column is renamed to owr_wf (overall water risk weight fraction)

Author: Rutger Hofste
Date: 20190521
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""
TESTING = 0

SCRIPT_NAME = "Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01"
OUTPUT_VERSION = 5

GDAL_RASTERIZE_PATH = "/opt/anaconda3/envs/python35/bin/gdal_rasterize"
X_DIMENSION_30S = 43200
Y_DIMENSION_30S = 21600

S3_TABLE_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/annual"
TABLE_INPUT_FILE_NAME = "annual_pivot.pkl"

#S3_GEOM_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom"
#GEOM_INPUT_FILE_NAME = "master_geom.shp"

S3_GEOM_INPUT_PATH = "s3://wri-projects/Aqueduct30/rawData/Vizzuality/Y2019M05D21_RH_Simplified_Master_Geom_V01/output_V01"
GEOM_INPUT_FILE_NAME = "y2018m12d06_rh_master_shape_v02_2.gpkg"


ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_process_path =  "/volumes/data/{}/process_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 

GCS_OUTPUT_PATH = "gs://aqueduct30_v01/{}/".format(SCRIPT_NAME)

print("\nGCS_OUTPUT_PATH: " + GCS_OUTPUT_PATH)


GCS_OUTPUT_PATH: gs://aqueduct30_v01/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

%matplotlib inline

Y2019M05D22 UTC 15:47


In [3]:
import os
import subprocess
import numpy as np
import pandas as pd
import geopandas as gpd
from google.cloud import bigquery



In [4]:
!rm -r {ec2_input_path}
!rm -r {ec2_process_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_process_path}
!mkdir -p {ec2_output_path}

In [5]:
!aws s3 cp {S3_TABLE_INPUT_PATH}/{TABLE_INPUT_FILE_NAME} {ec2_input_path} 

download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/annual/annual_pivot.pkl to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V05/annual_pivot.pkl


In [6]:
!aws s3 cp {S3_GEOM_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/rawData/Vizzuality/Y2019M05D21_RH_Simplified_Master_Geom_V01/output_V01/y2018m12d06_rh_master_shape_v02_2.gpkg to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V05/y2018m12d06_rh_master_shape_v02_2.gpkg


In [7]:
table_input_path = "{}/{}".format(ec2_input_path,TABLE_INPUT_FILE_NAME)
geom_input_path = "{}/{}".format(ec2_input_path,GEOM_INPUT_FILE_NAME)

In [8]:
df_table = pd.read_pickle(table_input_path)

In [9]:
gdf_geom = gpd.read_file(geom_input_path)

In [10]:
df_table.head()

Unnamed: 0,aq30_id,aqid,area_km2,bwd_cat,bwd_label,bwd_raw,bwd_score,bws_cat,bws_label,bws_raw,...,w_awr_tex_rrr_cat,w_awr_tex_rrr_label,w_awr_tex_rrr_raw,w_awr_tex_rrr_score,w_awr_tex_rrr_weight_fraction,w_awr_tex_tot_cat,w_awr_tex_tot_label,w_awr_tex_tot_raw,w_awr_tex_tot_score,w_awr_tex_tot_weight_fraction
0,68095,-9999,0.021614,,,,,,,,...,4.0,Extremely High (4-5),4.16,4.215654,0.163265,4.0,Extremely High (4-5),4.58,4.819062,0.326531
1,68245,-9999,0.018349,,,,,,,,...,2.0,Medium - High (2-3),2.5,2.779638,0.163265,4.0,Extremely High (4-5),2.869318,4.08209,0.326531
2,68422,-9999,0.006812,,,,,,,,...,1.0,Low - Medium (1-2),1.2,1.828221,0.163265,2.0,Medium - High (2-3),1.858333,2.502638,0.326531
3,68408,-9999,0.033232,,,,,,,,...,2.0,Medium - High (2-3),1.64,2.177403,0.163265,2.0,Medium - High (2-3),1.718333,2.218954,0.326531
4,68110,-9999,14.558037,,,,,,,,...,,NoData,,,0.0,,NoData,,,0.0


In [11]:
df_table.drop(columns=["aq30_id","pfaf_id","gid_1","aqid"],inplace=True)

In [12]:
gdf_geom.head()

Unnamed: 0,aq30_id,aqid,gid_1,pfaf_id,string_id,type,geometry
0,0,3365,EGY.11_1,111011,111011-EGY.11_1-3365,Polygon,(POLYGON ((31.90590570688292 29.85788703615783...
1,1,3365,EGY.15_1,111011,111011-EGY.15_1-3365,Polygon,(POLYGON ((32.37500000014998 30.09166666628367...
2,2,-9999,EGY.15_1,111011,111011-EGY.15_1-None,MultiPolygon,"(POLYGON ((32.5295365298621 29.95075831581867,..."
3,3,3365,-9999,111011,111011-None-3365,MultiPolygon,(POLYGON ((32.46194054146073 29.89250514754305...
4,4,-9999,-9999,111011,111011-None-None,MultiPolygon,(POLYGON ((32.46194054146073 29.89250514754305...


In [13]:
gdf_merge = gdf_geom.merge(df_table,on="string_id",how="left")

In [14]:
gdf_merge.head()

Unnamed: 0,aq30_id,aqid,gid_1,pfaf_id,string_id,type,geometry,area_km2,bwd_cat,bwd_label,...,w_awr_tex_rrr_cat,w_awr_tex_rrr_label,w_awr_tex_rrr_raw,w_awr_tex_rrr_score,w_awr_tex_rrr_weight_fraction,w_awr_tex_tot_cat,w_awr_tex_tot_label,w_awr_tex_tot_raw,w_awr_tex_tot_score,w_awr_tex_tot_weight_fraction
0,0,3365,EGY.11_1,111011,111011-EGY.11_1-3365,Polygon,(POLYGON ((31.90590570688292 29.85788703615783...,4.223754,4.0,Extremely High (>75%),...,2.0,Medium - High (2-3),1.622678,2.165272,0.326531,3.0,High (3-4),2.457171,3.614603,0.877551
1,1,3365,EGY.15_1,111011,111011-EGY.15_1-3365,Polygon,(POLYGON ((32.37500000014998 30.09166666628367...,1846.012343,4.0,Extremely High (>75%),...,2.0,Medium - High (2-3),1.622678,2.165272,0.326531,3.0,High (3-4),2.457171,3.614603,0.877551
2,2,-9999,EGY.15_1,111011,111011-EGY.15_1-None,MultiPolygon,"(POLYGON ((32.5295365298621 29.95075831581867,...",30.526067,4.0,Extremely High (>75%),...,2.0,Medium - High (2-3),1.622678,2.165272,0.326531,3.0,High (3-4),2.457171,3.614603,0.877551
3,3,3365,-9999,111011,111011-None-3365,MultiPolygon,(POLYGON ((32.46194054146073 29.89250514754305...,0.742712,4.0,Extremely High (>75%),...,1.0,Low - Medium (1-2),0.445356,1.133763,0.163265,3.0,High (3-4),2.477321,3.649648,0.55102
4,4,-9999,-9999,111011,111011-None-None,MultiPolygon,(POLYGON ((32.46194054146073 29.89250514754305...,13.430995,4.0,Extremely High (>75%),...,1.0,Low - Medium (1-2),0.445356,1.133763,0.163265,3.0,High (3-4),2.477321,3.649648,0.55102


In [15]:
pd.set_option('display.max_rows', 500)

In [16]:
df_table.dtypes

area_km2                         float64
bwd_cat                          float64
bwd_label                         object
bwd_raw                          float64
bwd_score                        float64
bws_cat                          float64
bws_label                         object
bws_raw                          float64
bws_score                        float64
cc_1                              object
cep_cat                          float64
cep_label                         object
cep_raw                          float64
cep_score                        float64
cfr_cat                          float64
cfr_label                         object
cfr_raw                          float64
cfr_score                        float64
drr_cat                          float64
drr_label                         object
drr_raw                          float64
drr_score                        float64
engtype_1                         object
gid_0                             object
gtd_cat         

In [17]:
gdf_selection = gdf_merge[["string_id","w_awr_def_tot_score","w_awr_def_tot_weight_fraction","bws_score","iav_score","sev_score","geometry"]]

In [18]:
gdf_selection.rename(columns={"w_awr_def_tot_score":"owr_score",
                              "w_awr_def_tot_weight_fraction":"owr_wf"},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [19]:
process_path = "{}/{}.shp".format(ec2_process_path,SCRIPT_NAME)

In [20]:
if TESTING:
    gdf_selection =gdf_selection[0:1000]

In [21]:
gdf_selection.to_file(driver="ESRI Shapefile",filename=process_path)

In [22]:
indicators = ["owr_score","owr_wf","bws_score","iav_score","sev_score"]

In [23]:
for indicator in indicators:
    print(indicator)
    column = indicator
    layer = SCRIPT_NAME
    destination_path_shp = process_path
    destination_path_tif = "{}/{}.tif".format(ec2_output_path,indicator)
    command = "{} -a {} -at -ot Integer64 -of GTiff -te -180 -90 180 90 -ts {} {} -co COMPRESS=DEFLATE -co PREDICTOR=1 -co ZLEVEL=6 -l {} -a_nodata -9999 {} {}".format(GDAL_RASTERIZE_PATH,column,X_DIMENSION_30S,Y_DIMENSION_30S,layer,destination_path_shp,destination_path_tif)
    print(command)
    response = subprocess.check_output(command,shell=True)
    

owr_score
/opt/anaconda3/envs/python35/bin/gdal_rasterize -a owr_score -at -ot Integer64 -of GTiff -te -180 -90 180 90 -ts 43200 21600 -co COMPRESS=DEFLATE -co PREDICTOR=1 -co ZLEVEL=6 -l Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01 -a_nodata -9999 /volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/process_V05/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01.shp /volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/owr_score.tif
owr_wf
/opt/anaconda3/envs/python35/bin/gdal_rasterize -a owr_wf -at -ot Integer64 -of GTiff -te -180 -90 180 90 -ts 43200 21600 -co COMPRESS=DEFLATE -co PREDICTOR=1 -co ZLEVEL=6 -l Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01 -a_nodata -9999 /volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/process_V05/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01.shp /volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/owr_wf.tif
bws_score
/opt/anaconda3/envs/python35/bin/gdal_rasterize -a bws_score -at -ot Integer64 -of GT

In [24]:
!gsutil -m cp -r {ec2_output_path} {GCS_OUTPUT_PATH}

Copying file:///volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/iav_score.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/bws_score.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/sev_score.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/owr_score.tif [Content-Type=image/tiff]...
Copying file:///volumes/data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V05/owr_wf.tif [Content-Type=image/tiff]...
\ [5/5 files][143.6 MiB/143.6 MiB] 100% Done                                    
Operation completed over 5 objects/143.6 MiB.                                    


In [25]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:20:34.079593
