# Calculate average PCRGlobWB supply using EE

* Purpose of script: This script will join the csv tables from GCS into one file using pandas
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20170914

In [1]:
import time
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
print(dateString,timeString)

Y2017M09D14 UTC 19:40


In [64]:
GCS_INPUT_PATH = "gs://aqueduct30_v01/Y2017M09D11_RH_zonal_stats_EE_V15/"
EC2_INPUT_PATH = "/volumes/data/Y2017M09D14_RH_merge_EE_results_V01/input"
EC2_OUTPUT_PATH = "/volumes/data/Y2017M09D14_RH_merge_EE_results_V01/output"

STRING_TRIM = "V15ee_export.csv"
# e.g. IrrLinearWW_monthY2014M12V15ee_export.csv -> IrrLinearWW_monthY2014M12

#Aux files, do not change order i.e. zones, area, extra
AUXFILES = ["Hybas06",
            "area_30s_m2",
            "ones_30s"
           ]

DROP_COLUMNS = [".geo","system:index"]




In [65]:
!mkdir -p {EC2_INPUT_PATH}
!mkdir -p {EC2_OUTPUT_PATH}

In [66]:
#!gsutil cp -r {GCS_INPUT_PATH} {EC2_INPUT_PATH} 

In [67]:
import pandas as pd
import os
import re

In [81]:
def createRegex(aList):
    return '|'.join(aList)

def prepareFile(oneFile):
        trimFileName = oneFile[:-len(STRING_TRIM)]
        d ={}
        d["df"] = pd.read_csv(os.path.join(folder,oneFile))
        d["df"] = prepareDf(d["df"])
        d["trimFileName"] = trimFileName
        return d         
        

def prepareDf(df):
    for column in df.columns:
        if re.search("PfafID",column):
            df2 = df.set_index(column)
            df2 = df2.drop(DROP_COLUMNS,1)        
            return df2
        


    
    


In [82]:
folder = os.path.join(EC2_INPUT_PATH,"Y2017M09D11_RH_zonal_stats_EE_V15/")

In [83]:
files = os.listdir(folder)

## Process Auxiliary Datasets (PfafID, Area, Ones)

In [84]:
dAux ={}
for regex in AUXFILES:
    r = re.compile(regex)
    newList = filter(r.match, files)
    oneFile = list(newList)[0]
    dAux[regex] = prepareFile(oneFile)   

In [85]:
regex = createRegex(AUXFILES)

In [86]:
print(regex)

Hybas06|area_30s_m2|ones_30s


In [88]:
d ={}
dAux ={}
for oneFile in files: 
    trimFileName = oneFile[:-len(STRING_TRIM)]    
    if not re.search(regex,oneFile):
        d[trimFileName] = prepareFile(oneFile)
        
    elif re.search(regex,oneFile):
        dAux[trimFileName] = prepareFile(oneFile)
    
    else:
        print("Unrecognized file name, check STRING_TRIM variable")
        

In [121]:
dfLeft = dAux[AUXFILES[0]]["df"]

# Adding area to shapes

In [126]:
dAux[AUXFILES[1]]["df"]["total_%s" %(AUXFILES[1])] = dAux[AUXFILES[1]]["df"]["count_%s" %(AUXFILES[1])] * dAux[AUXFILES[1]]["df"]["mean_%s" %(AUXFILES[1])]

In [127]:
dAux[AUXFILES[1]]["df"]

Unnamed: 0_level_0,count_area_30s_m2,mean_area_30s_m2,total_area_30s_m2
PfafID_area_30s_m2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
111011,2536,743658.186761,1.885917e+09
111012,3921,746186.462653,2.925797e+09
111013,1194,747422.836265,8.924229e+08
111014,5605,750449.270450,4.206268e+09
111015,21873,758792.279231,1.659706e+10
111016,2382,768191.799013,1.829833e+09
111017,11170,773545.447829,8.640503e+09
111018,2509,779534.792447,1.955853e+09
111019,8363,783433.321020,6.551853e+09
111020,14779,788097.381813,1.164729e+10


In [155]:
dfMerge = dAux[AUXFILES[0]]["df"].merge(dAux[AUXFILES[1]]["df"],
                       how="outer",
                       left_index=True,
                       right_index=True,
                       sort=True
                      )

In [157]:
for key, value in d.items():
    dfNew = value["df"].copy()
    # total new value = area in m^2 times mean flux 
    dfNew["total_volume_%s" %(value["trimFileName"])] = dAux[AUXFILES[1]]["df"]["total_%s" %(AUXFILES[1])] * value["df"]["mean_%s" %(value["trimFileName"])]
    
     
    
    dfMerge = dfMerge.merge(dfNew,
                           how="outer",
                           left_index=True,
                           right_index=True,
                           sort=True                   
                           )

In [158]:
dfMerge.head()

Unnamed: 0_level_0,count_Hybas06,mean_Hybas06,count_area_30s_m2,mean_area_30s_m2,total_area_30s_m2,count_PIrrWN_monthY2014M10,mean_PIrrWN_monthY2014M10,total_volume_PIrrWN_monthY2014M10,count_PLivWW_monthY2014M10,mean_PLivWW_monthY2014M10,...,total_volume_PDomWW_yearY2014M12,count_PIndWW_monthY2014M08,mean_PIndWW_monthY2014M08,total_volume_PIndWW_monthY2014M08,count_PDomWW_monthY2014M01,mean_PDomWW_monthY2014M01,total_volume_PDomWW_monthY2014M01,count_runoff_monthY2014M09,mean_runoff_monthY2014M09,total_volume_runoff_monthY2014M09
PfafID_Hybas06,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
111011,2536,111011.0,2536,743658.186761,1885917000.0,2536,4.1e-05,76726.246728,2536,1.319826e-07,...,11151000.0,2536,0.002146,4048026.0,2536,0.000465,876146.11607,2479,-1.791564e-07,-337.87407
111012,3921,111012.0,3921,746186.462653,2925797000.0,3921,0.0,0.0,3921,6.403578e-07,...,156119.3,3921,0.000229,668861.3,3921,4e-06,12275.787016,3914,-7.736314e-08,-226.348862
111013,1194,111013.0,1194,747422.836265,892422900.0,1194,0.0,0.0,1194,4.948549e-08,...,139074.9,1194,0.000694,619162.3,1194,1.2e-05,10927.294218,1141,-4.750628e-08,-42.395691
111014,5605,111014.0,5605,750449.27045,4206268000.0,5605,0.0,0.0,5605,2.041677e-07,...,129565.4,5605,1.8e-05,75588.42,5605,2e-06,10179.78997,5605,0.0,0.0
111015,21873,111015.0,21873,758792.279231,16597060000.0,21769,0.0,0.0,21769,2.595118e-08,...,4036594.0,21769,0.000541,8983459.0,21769,1.9e-05,317247.420919,21086,0.0,0.0
