In [17]:
""" Combine zonal statistics of different indicators and calculate flux. 
-------------------------------------------------------------------------------
zonal_stats_ca_aq21ee_export.csv

Author: Rutger Hofste
Date: 20180619
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:


"""

OVERWRITE = 1
TESTING = 0
SCRIPT_NAME = "Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01"
OUTPUT_VERSION = 2

GCS_INPUT_PATH = "gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03"

AQ21_SHAPEFILE_S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05"
AQ30_SHAPEFILE_S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/processData/Y2017M08D02_RH_Merge_HydroBasins_V02/output_V04/"
AQ21PROJ_SHAPEFILE_S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/qaData/Y2018M06D19_RH_QA_Download_Aq21projection_Shapefile_V01/output_V01"

AQ21_INPUT_FILE_NAME = "aqueduct21_flux"
AQ30_INPUT_FILE_NAME = "hybas_lev06_v1c_merged_fiona_V04"
AQ21PROJ_INPUT_FILE_NAME = "aqueduct21projection_flux"

ECKERT_IV_PROJ4_STRING = "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
s3_output_path = "s3://wri-projects/Aqueduct30/qaData/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Input GCS : " + GCS_INPUT_PATH +
      "\nInput ec2: " + ec2_input_path + 
      "\nOutput ec2: " + ec2_output_path +
      "\nOutput s3: " + ec2_output_path)

Input GCS : gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03
Input ec2: /volumes/data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02
Output ec2: /volumes/data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/output_V02
Output s3: /volumes/data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/output_V02


In [2]:
import time, datetime, sys, logging
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M06D19 UTC 13:46


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
import geopandas as gpd
import pandas as pd

In [4]:
if OVERWRITE:
    !rm -r {ec2_input_path}
    !rm -r {ec2_output_path}
    !mkdir -p {ec2_input_path}
    !mkdir -p {ec2_output_path}
else: 
    !mkdir -p {ec2_input_path}
    !mkdir -p {ec2_output_path}

rm: cannot remove '/volumes/data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02': No such file or directory
rm: cannot remove '/volumes/data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/output_V02': No such file or directory


In [5]:
# Aq 21 shapefile
!aws s3 cp {AQ21_SHAPEFILE_S3_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05/aqueduct21_flux.cpg to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21_flux.cpg
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05/aqueduct21_flux.prj to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21_flux.prj
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05/aqueduct21_flux.shx to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21_flux.shx
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05/aqueduct21_flux.shp to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21_flux.shp
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D05_RH_QA_Aqueduct21_Flux_Shapefile_V01/output_V05/aqueduc

In [6]:
# Aq 30 shapefile
!aws s3 cp {AQ30_SHAPEFILE_S3_INPUT_PATH} {ec2_input_path} --recursive --exclude "*" --include "hybas_lev06_v1c_merged_fiona_V04*"

download: s3://wri-projects/Aqueduct30/processData/Y2017M08D02_RH_Merge_HydroBasins_V02/output_V04/hybas_lev06_v1c_merged_fiona_V04.cpg to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/hybas_lev06_v1c_merged_fiona_V04.cpg
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D02_RH_Merge_HydroBasins_V02/output_V04/hybas_lev06_v1c_merged_fiona_V04.prj to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/hybas_lev06_v1c_merged_fiona_V04.prj
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D02_RH_Merge_HydroBasins_V02/output_V04/hybas_lev06_v1c_merged_fiona_V04.shx to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/hybas_lev06_v1c_merged_fiona_V04.shx
download: s3://wri-projects/Aqueduct30/processData/Y2017M08D02_RH_Merge_HydroBasins_V02/output_V04/hybas_lev06_v1c_merged_fiona_V04.dbf to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/hybas_lev06_v1c_merged_fiona_V04.dbf


In [7]:
# Aq 21 proj shapefile
!aws s3 cp {AQ21PROJ_SHAPEFILE_S3_INPUT_PATH} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D19_RH_QA_Download_Aq21projection_Shapefile_V01/output_V01/aqueduct21projection_flux.prj to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21projection_flux.prj
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D19_RH_QA_Download_Aq21projection_Shapefile_V01/output_V01/aqueduct21projection_flux.cpg to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21projection_flux.cpg
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D19_RH_QA_Download_Aq21projection_Shapefile_V01/output_V01/aqueduct21projection_flux.shx to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21projection_flux.shx
download: s3://wri-projects/Aqueduct30/qaData/Y2018M06D19_RH_QA_Download_Aq21projection_Shapefile_V01/output_V01/aqueduct21projection_flux.shp to ../../../../data/Y2018M06D19_RH_QA_AQ21_AQ30_Demand_Cleanup_V01/input_V02/aqueduct21projection_flux.shp


In [18]:
# Zonal Stats

!gsutil cp {GCS_INPUT_PATH}/* {ec2_input_path}

Copying gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03/zonal_stats_ca_aq21ee_export.csv...
Copying gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03/zonal_stats_ca_aq21projee_export.csv...
Copying gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03/zonal_stats_ca_aq30ee_export.csv...
Copying gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03/zonal_stats_cd_aq21ee_export.csv...
/ [4 files][ 86.4 MiB/ 86.4 MiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m -o ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://aqueduct30_v01/Y2018M06D18_RH_QA_AQ21_AQ30_Demand_Zonal_Stats_EE_V01/output_V03/zonal_stats_cd_aq21projee_export.csv...
Copying gs://a

In [14]:
# Read Shapefiles of Aq2.1 Aq3.0 and Aq21proj

aq21_input_file_path = "{}/{}.shp".format(ec2_input_path,AQ21_INPUT_FILE_NAME)
gdf_aq21 = gpd.read_file(aq21_input_file_path )
gdf_aq21 = gdf_aq21.set_index("GU")

aq30_input_file_path = "{}/{}.shp".format(ec2_input_path,AQ30_INPUT_FILE_NAME)
gdf_aq30 = gpd.read_file(aq30_input_file_path )

gdf_aq30_eckert4 = gdf_aq30.to_crs(ECKERT_IV_PROJ4_STRING)
gdf_aq30["area_m2"] = gdf_aq30_eckert4.geometry.area
gdf_aq30 = gdf_aq30.set_index("PFAF_ID")

In [15]:
aq21proj_input_file_path = "{}/{}.shp".format(ec2_input_path,AQ21PROJ_INPUT_FILE_NAME)
gdf_aq21proj = gpd.read_file(aq21proj_input_file_path )
gdf_aq21proj = gdf_aq21proj.set_index("BasinID")

In [None]:
aqueduct_versions = ["aq21","aq30","aq21proj"]
sectors = ["a","d","i","t"]
demand_types = ["c","u"]

for aqueduct_version in aqueduct_versions:  
    
    if aqueduct_version == "aq21":
        gdf_left = gdf_aq21.copy()
        index_name = "GU"
    elif aqueduct_version == "aq30":
        gdf_left = gdf_aq30.copy()
        index_name = "PFAF_ID"
    elif aqueduct_version == "aq21proj":
        gdf_left = gdf_aq21proj.copy()
        index_name = "BasinID"        
        
    else:
        break
    
    for demand_type in demand_types:
        for sector in sectors:
            print(aqueduct_version,demand_type,sector)
            input_file_name = "zonal_stats_{}{}_{}ee_export.csv".format(demand_type,sector,aqueduct_version)
            input_file_path = ec2_input_path + "/" + input_file_name
            df_in = pd.read_csv(input_file_path)
            
            df_out = df_in[["sum","count",index_name]].copy()
            df_out = df_out.set_index(index_name)
            
            df_out = df_out.rename(columns={"sum":"sum_{}{}_m3".format(demand_type,sector),
                                            "count":"count_{}{}_dimensionless".format(demand_type,sector)})
            
            gdf_left  = gdf_left.merge(right=df_out,
                                   how="left",
                                   left_index = True,
                                   right_index = True)
            gdf_left["sum_{}{}_m".format(demand_type,sector)] = gdf_left["sum_{}{}_m3".format(demand_type,sector)]/gdf_left["area_m2"]
            
            
    gdf_out = gdf_left
    df_out = pd.DataFrame(gdf_out.drop("geometry",1))
    output_file_path_no_ext = "{}/{}".format(ec2_output_path,aqueduct_version)
    
    gdf_left.to_file(driver='ESRI Shapefile', filename=output_file_path_no_ext+".shp")
    df_out.to_csv(output_file_path_no_ext+".csv")

aq21 c a
aq21 c d
aq21 c i
aq21 c t
aq21 u a
aq21 u d
aq21 u i
aq21 u t


In [None]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

Previous runs:  
