In [1]:
""" Zonal statistics for basin demand. Export in table format.
-------------------------------------------------------------------------------
Zonal statistics for basin area. Export in table format.

Strategy:

1. first riverdischarge in zones masked by previous script (max_fa)

2. mask endorheic basins with mask from previous script

3. sum riverdischarge in remaining pixels




Author: Rutger Hofste
Date: 20180504
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:

    TESTING (boolean) : Testing mode. Uses a smaller geography if enabled.
    
    SCRIPT_NAME (string) : Script name.
    EE_INPUT_ZONES_PATH (string) : earthengine input path for zones.
    EE_INPUT_VALUES_PATH (string) : earthengine input path for value images.
    INPUT_VERSION_ZONES (integer) : input version for zones images.
    INPUT_VERSION_VALUES (integer) : input version for value images.
    OUTPUT_VERSION (integer) : output version. 
    EXTRA_PROPERTIES (dictionary) : Extra properties to store in the resulting
        pandas dataframe. 
    

Returns:

"""

TESTING = 0
SCRIPT_NAME = "Y2018M05D04_RH_Zonal_Stats_Supply_EE_V01"
OUTPUT_VERSION = 3

EE_INPUT_ZONES_ASSET_ID = "projects/WRI-Aquaduct/Y2018M05D08_RH_Create_Zones_Mask_30sPfaf06_EE_V01/output_V02/validmaxfa_hybas_lev06_v1c_merged_fiona_30s_V04"
EE_INPUT_RIVERDISCHARGE_PATH_ID = "projects/WRI-Aquaduct/PCRGlobWB20V09/"

SEPARATOR = "_|-"
SCHEMA =["geographic_range",
         "temporal_range",
         "indicator",
         "temporal_resolution",
         "unit",
         "spatial_resolution",
         "temporal_range_min",
         "temporal_range_max"]

EXTRA_PROPERTIES = {"output_version":OUTPUT_VERSION,
                    "script_used":SCRIPT_NAME,
                   }


# Output Parameters
gcs_output_path = "gs://aqueduct30_v01/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)

print("Input ee zones: " +  EE_INPUT_ZONES_ASSET_ID +
      "\nInput ee values path: " + EE_INPUT_RIVERDISCHARGE_PATH_ID  +
      "\nOutput gcs: " + gcs_output_path)


Input ee zones: projects/WRI-Aquaduct/Y2018M05D08_RH_Create_Zones_Mask_30sPfaf06_EE_V01/output_V02/validmaxfa_hybas_lev06_v1c_merged_fiona_30s_V04
Input ee values path: projects/WRI-Aquaduct/PCRGlobWB20V09/
Output gcs: gs://aqueduct30_v01/Y2018M05D04_RH_Zonal_Stats_Supply_EE_V01/output_V03/


In [2]:
import time, datetime, sys, logging
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M05D09 UTC 08:01


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# Imports
import pandas as pd
from datetime import timedelta
import os
import ee
import aqueduct3

ee.Initialize()

In [4]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s')
file_handler = logging.FileHandler("./logs/{}.log".format(SCRIPT_NAME))
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

In [5]:
#!rm -r {ec2_output_path}
#!mkdir -p {ec2_output_path}

In [6]:
def post_process_results(result_list,function_properties,extra_properties=EXTRA_PROPERTIES):
    """Client side function to convert results of reduceRegion to pandas dataframe.
    -------------------------------------------------------------------------------
    
    Adds additional properties. The script is client side for convenience reasons.
    A more robust and fast approach would be to add the extra_properties to the 
    server side dictionary.
    
    Args:
        result_list (ee.List) : List of dictionaries. Result from reduceRegion
        function_properties (dictionary) : Additional properties used in the 
            reduceRegion function call.
        extra_properties (dictionary) : Additional properties set at global level. 
    
    Returns:
        df (pd.DataFrame) : Pandas dataframe with extra properties.
    
    
    """
    extra_properties = {**function_properties, **EXTRA_PROPERTIES}
    result_list_clientside = result_list.getInfo()
    df = pd.DataFrame(result_list_clientside)
    df = df.assign(**extra_properties)
    df = df.apply(pd.to_numeric, errors='ignore')
    return df  

In [7]:
# 1. first riverdischarge in zones masked by previous script (max_fa)

In [8]:
temporal_resolutions = ["month","year"]
spatial_resolution = "30s"
pfaf_level = 6
indicator = "riverdischarge"
reducer_name = "first"

In [None]:
i_processed = 0
start_time = time.time()

# Zones Image
i_zones_30sPfaf06 = ee.Image(EE_INPUT_ZONES_ASSET_ID)

# Geospatial constants
geometry_server_side = aqueduct3.earthengine.get_global_geometry(test=TESTING)
geometry_client_side = geometry_server_side.getInfo()['coordinates']

crs_transform = aqueduct3.earthengine.get_crs_transform(spatial_resolution)

for temporal_resolution in temporal_resolutions:
    ic_values_input_asset_id = "{}global_historical_{}_{}_millionm3_5min_1960_2014".format(EE_INPUT_RIVERDISCHARGE_PATH_ID,indicator,temporal_resolution)
    print(ic_values_input_asset_id)
    df = aqueduct3.earthengine.get_df_from_ic(ic_values_input_asset_id)

    if TESTING:
        df = df[0:3]
    else:
        pass
    
    for index, row in df.iterrows():
        i_processed = i_processed + 1
        elapsed_time = time.time() - start_time
        i_values_input_asset_id = row["input_image_asset_id"]
        # Add an artificial extension to allow the function to run. 
        # consider updating the split_key function to handle cases without an extension.
        i_values_input_asset_id_extenstion = i_values_input_asset_id + ".ee_image"
        dictje = aqueduct3.split_key(i_values_input_asset_id_extenstion,SCHEMA,SEPARATOR)

        output_file_name = "{}_reduced_{:02.0f}_{}_{}".format(dictje["file_name"],pfaf_level,spatial_resolution,reducer_name)
        output_file_path_pkl = "{}/{}.pkl".format(ec2_output_path,output_file_name)
        output_file_path_csv = "{}/{}.csv".format(ec2_output_path,output_file_name)

        if os.path.isfile(output_file_path_pkl):
            message = "Index {:02.2f}, Skipping: {} Elapsed: {} Asset: {}".format(float(index),i_processed,str(timedelta(seconds=elapsed_time)),i_values_input_asset_id)
            logger.debug(message)
        else:
            message = "Index {:02.2f}, Processed: {} Elapsed: {} Asset: {}".format(float(index),i_processed,str(timedelta(seconds=elapsed_time)),i_values_input_asset_id)
            print(message)
            logger.debug(message)

            i_values = ee.Image(i_values_input_asset_id)
            
            
            result_list = aqueduct3.earthengine.raster_zonal_stats(
                                        i_zones = i_zones_30sPfaf06,
                                        i_values = i_values,
                                        statistic_type = reducer_name,
                                        geometry = geometry_server_side,
                                        crs_transform = crs_transform,
                                        crs="EPSG:4326")
            
            function_properties = {"zones_pfaf_level":pfaf_level,
                                   "zones_spatial_resolution":spatial_resolution,
                                   "reducer":reducer_name,
                                   "zones_image_asset_id":EE_INPUT_ZONES_ASSET_ID}

            function_properties = {**function_properties, **dictje}
            try:
                df = post_process_results(result_list,function_properties)
                df.to_pickle(output_file_path_pkl)
                #df.to_csv(output_file_path_csv,encoding='utf-8')
            except:
                message = "Index {:02.2f}, Error: {} Elapsed: {} Asset: {}".format(float(index),i_processed,str(timedelta(seconds=elapsed_time)),i_values_input_asset_id)
                logger.debug(message)

projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014
Index 318.00, Processed: 319 Elapsed: 0:00:03.261122 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I318Y1986M07
Index 347.00, Processed: 348 Elapsed: 0:00:46.878940 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I347Y1988M12
Index 348.00, Processed: 349 Elapsed: 0:01:36.608721 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I348Y1989M01
Index 349.00, Processed: 350 Elapsed: 0:02:20.625630 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverd

Index 380.00, Processed: 381 Elapsed: 0:24:05.481222 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I380Y1991M09
Index 381.00, Processed: 382 Elapsed: 0:24:44.408051 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I381Y1991M10
Index 382.00, Processed: 383 Elapsed: 0:25:19.020525 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I382Y1991M11
Index 383.00, Processed: 384 Elapsed: 0:26:03.189024 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I383Y1991M12
Index 384.00, Processed: 385 Elapsed: 0:26:43.24

Index 415.00, Processed: 416 Elapsed: 0:47:25.719483 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I415Y1994M08
Index 416.00, Processed: 417 Elapsed: 0:48:14.421297 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I416Y1994M09
Index 417.00, Processed: 418 Elapsed: 0:48:47.217500 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I417Y1994M10
Index 418.00, Processed: 419 Elapsed: 0:49:36.528543 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I418Y1994M11
Index 419.00, Processed: 420 Elapsed: 0:50:15.93

Index 450.00, Processed: 451 Elapsed: 1:11:16.453583 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I450Y1997M07
Index 451.00, Processed: 452 Elapsed: 1:12:23.308733 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I451Y1997M08
Index 452.00, Processed: 453 Elapsed: 1:12:53.880298 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I452Y1997M09
Index 453.00, Processed: 454 Elapsed: 1:14:06.567258 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I453Y1997M10
Index 454.00, Processed: 455 Elapsed: 1:14:35.43

Index 485.00, Processed: 486 Elapsed: 1:37:36.276481 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I485Y2000M06
Index 486.00, Processed: 487 Elapsed: 1:38:02.022019 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I486Y2000M07
Index 487.00, Processed: 488 Elapsed: 1:38:49.359459 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I487Y2000M08
Index 488.00, Processed: 489 Elapsed: 1:39:30.665776 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I488Y2000M09
Index 489.00, Processed: 490 Elapsed: 1:39:55.48

Index 520.00, Processed: 521 Elapsed: 2:03:09.323384 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I520Y2003M05
Index 521.00, Processed: 522 Elapsed: 2:03:50.345621 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I521Y2003M06
Index 522.00, Processed: 523 Elapsed: 2:04:22.869712 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I522Y2003M07
Index 523.00, Processed: 524 Elapsed: 2:05:04.077780 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I523Y2003M08
Index 524.00, Processed: 525 Elapsed: 2:05:40.84

Index 555.00, Processed: 556 Elapsed: 2:28:09.922554 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I555Y2006M04
Index 556.00, Processed: 557 Elapsed: 2:29:15.694928 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I556Y2006M05
Index 557.00, Processed: 558 Elapsed: 2:30:08.510385 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I557Y2006M06
Index 558.00, Processed: 559 Elapsed: 2:30:57.635299 Asset: projects/WRI-Aquaduct/PCRGlobWB20V09/global_historical_riverdischarge_month_millionm3_5min_1960_2014/global_historical_riverdischarge_month_millionm3_5min_1960_2014_I558Y2006M07
Index 559.00, Processed: 560 Elapsed: 2:31:42.28

In [None]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive --exclude='*' --include='*.pkl'

In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

Previous runs:  
