In [15]:
""" Rasterize Aqueduct 30 and store to Google Cloud Storage.
-------------------------------------------------------------------------------

Recap: The quantiles approach has been applied to all weightings, not just
the default one. Therefore there are slight variations in the histogram for
"def" or default weighting. 


Author: Rutger Hofste
Date: 20190521
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = "Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01"
OUTPUT_VERSION = 1

X_DIMENSION_30S = 43200
Y_DIMENSION_30S = 21600

S3_TABLE_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/annual"
TABLE_INPUT_FILE_NAME = "annual_normalized.pkl"

S3_GEOM_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom"
GEOM_INPUT_FILE_NAME = "master_geom.shp"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 

s3_output_path = "s3://wri-projects/Aqueduct30/Aq30vs21/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("\ns3_output_path: " + s3_output_path)


s3_output_path: s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/output_V01/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

%matplotlib inline

Y2019M05D21 UTC 16:26


In [23]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from google.cloud import bigquery



In [4]:
!rm -r {ec2_input_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_output_path}

In [6]:
!aws s3 cp {S3_TABLE_INPUT_PATH}/{TABLE_INPUT_FILE_NAME} {ec2_input_path} 

download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/annual/annual_normalized.pkl to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V01/annual_normalized.pkl


In [16]:
!aws s3 cp {S3_GEOM_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom/master_geom.cpg to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V01/master_geom.cpg
download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom/master_geom.prj to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V01/master_geom.prj
download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom/master_geom.dbf to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V01/master_geom.dbf
download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom/master_geom.shx to ../../../../../data/Y2019M05D21_RH_AQ30VS21_Rasterize_AQ30_EE_V01/input_V01/master_geom.shx
download: s3://wri-projects/Aqueduct30/finalData/Y2019M01D14_RH_Aqueduct_Results_V01/output_V04/master_geom/master_geom.shp 

In [22]:
table_input_path = "{}/{}".format(ec2_input_path,TABLE_INPUT_FILE_NAME)
geom_input_path = "{}/{}".format(ec2_input_path,GEOM_INPUT_FILE_NAME)

In [19]:
df_table = pd.read_pickle(table_input_path)

In [25]:
gdf_geom = gpd.read_file(geom_input_path)

In [None]:
# Hier gebleven

In [20]:
industry_short = "def"
indicator = "awr"
group_short = "tot"

In [21]:
df_table.head()

Unnamed: 0,cat,group_short,indicator,industry_short,label,raw,score,string_id,weight_fraction,weighted_score
0,4.0,qan,bwd,che,Extremely High,0.987061,4.948243,111011-EGY.11_1-3365,0.07619,0.377009
1,4.0,qan,bwd,smc,Extremely High,0.987061,4.948243,111011-EGY.11_1-3365,0.095238,0.471261
2,4.0,qan,bwd,min,Extremely High,0.987061,4.948243,111011-EGY.11_1-3365,0.061069,0.302183
3,4.0,qan,bwd,con,Extremely High,0.987061,4.948243,111011-EGY.11_1-3365,0.142857,0.706892
4,4.0,qan,bwd,tex,Extremely High,0.987061,4.948243,111011-EGY.11_1-3365,0.081633,0.403938


In [None]:
df_sel =  df.loc[(df["group_short"] == group_short) & (df["indicator"] == indicator) & (df["industry_short"] == industry_short)]