In [10]:
""" Compare country aggregations, create charts and combined database.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20190606
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = "Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01"
OUTPUT_VERSION = 1

S3_INPUT_PATH_AQ30 = "s3://wri-projects/Aqueduct30/finalData/Y2019M04D15_RH_GA_Aqueduct_Results_V01/output_V03"
S3_INPUT_PATH_AQ21 = "s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01"

ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path =  "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 

s3_output_path = "s3://wri-projects/Aqueduct30/Aq30vs21/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("S3_INPUT_PATH_AQ30: " + S3_INPUT_PATH_AQ30,
      "S3_INTPUT_PATH_AQ21: " + S3_INTPUT_PATH_AQ21,
      "\nec2_input_path: " + ec2_input_path +
      "\nec2_output_path: " + ec2_output_path+ 
      "\ns3_output_path: " + s3_output_path)

S3_INPUT_PATH_AQ30: s3://wri-projects/Aqueduct30/finalData/Y2019M04D15_RH_GA_Aqueduct_Results_V01/output_V03 S3_INTPUT_PATH_AQ21: s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01 
ec2_input_path: /volumes/data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01
ec2_output_path: /volumes/data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/output_V01
s3_output_path: s3://wri-projects/Aqueduct30/Aq30vs21/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/output_V01/


## Pre-Processing

Before the country ranking of Aqueduct could be loaded as a .csv, a few edits were made. 

1. Download the data in excel format
1. Copy sheet baseline water stress to new excel file
1. Delete column "hist"
1. Add column with ADM03 codes (taken from AQ30 data)
1. Add column new_name to match the GADM country names
1. Upload to S3://s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01/



In [5]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M06D06 UTC 21:58


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [6]:
!rm -r {ec2_input_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_output_path}

rm: cannot remove '/volumes/data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01': No such file or directory
rm: cannot remove '/volumes/data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/output_V01': No such file or directory


In [7]:
!aws s3 cp {S3_INPUT_PATH_AQ30} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/finalData/Y2019M04D15_RH_GA_Aqueduct_Results_V01/output_V03/Y2019M04D15_RH_GA_Aqueduct_Results_V01_country_V03.csv to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01/Y2019M04D15_RH_GA_Aqueduct_Results_V01_country_V03.csv
download: s3://wri-projects/Aqueduct30/finalData/Y2019M04D15_RH_GA_Aqueduct_Results_V01/output_V03/Y2019M04D15_RH_GA_Aqueduct_Results_V01_province_V03.csv to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01/Y2019M04D15_RH_GA_Aqueduct_Results_V01_province_V03.csv


In [11]:
!aws s3 cp {S3_INPUT_PATH_AQ21} {ec2_input_path} --recursive

download: s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01/readme.txt to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01/readme.txt
download: s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01/country_lookup_GADM_level0.csv to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01/country_lookup_GADM_level0.csv
download: s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.csv to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.csv
download: s3://wri-projects/Aqueduct30/processData/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01/output_V01/Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.xlsx to ../../../../../data/Y2019M06D06_RH_AQ30VS21_Country_Comparison_V01/input_V01

In [12]:
import os
import pandas as pd

In [13]:
os.listdir(ec2_input_path)

['Y2019M04D15_RH_GA_Aqueduct_Results_V01_province_V03.csv',
 'readme.txt',
 'Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.xlsx',
 'Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.csv',
 'country_lookup_GADM_level0.csv',
 'Y2019M04D15_RH_GA_Aqueduct_Results_V01_country_V03.csv']

In [14]:
aq21_input_filename = "Y2019M06D06_RH_AQ21_Country_Rankings_Simple_V01.csv"
aq30_input_filename = "Y2019M04D15_RH_GA_Aqueduct_Results_V01_country_V03.csv"

In [57]:
df_aq21_og = pd.read_csv("{}/{}".format(ec2_input_path,aq21_input_filename))

In [58]:
df_aq21_og.head()

Unnamed: 0,Rank,original_name,new_name,All sectors,sd,nodata,0,5,1,15,2,25,3,35,4,45,Agricultural,Domestic,Industrial,aq30_gid_0
0,1,Western Sahara,Western Sahara,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6427.0,,5.0,,ESH
1,1,United Arab Emirates,United Arab Emirates,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5566039000.0,5.0,5.0,5.0,ARE
2,1,Trinidad and Tobago,Trinidad and Tobago,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,314478300.0,5.0,5.0,5.0,TTO
3,1,Singapore,Singapore,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2570737000.0,,5.0,5.0,SGP
4,1,San Marino,San Marino,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,901388400.0,5.0,5.0,5.0,SMR


In [43]:
df_aq30_og = pd.read_csv("{}/{}".format(ec2_input_path,aq30_input_filename))

In [44]:
df_aq30_og.head()

Unnamed: 0.1,Unnamed: 0,gid_0,name_0,indicator_name,weight,score,score_ranked,cat,label,sum_weights,sum_weighted_indicator,count_valid,fraction_valid
0,0,AFG,Afghanistan,bws,One,3.899118,29.0,3.0,High (40-80%),903569.94902,3523125.0,919639.0,1.0
1,1,AFG,Afghanistan,bwd,One,2.905485,33.0,2.0,Medium - High (25-50%),903569.94902,2625309.0,919639.0,1.0
2,2,AFG,Afghanistan,iav,One,2.395697,40.0,2.0,Medium - High (0.50-0.75),903569.94902,2164680.0,919639.0,1.0
3,3,AFG,Afghanistan,sev,One,3.601233,4.0,3.0,High (1.00-1.33),903569.94902,3253966.0,919639.0,1.0
4,4,AFG,Afghanistan,bws,Tot,3.798975,31.0,3.0,High (40-80%),29393.117023,111663.7,919639.0,1.0


In [52]:
def pre_process_aq21(df):
    df_sel = df[["Rank","aq30_gid_0","new_name","All sectors","Agricultural","Domestic" ,"Industrial"]]
    df_sel.rename(columns={"Rank":"aq21_rank",
                           "new_name":"country_name",
                           "All sectors":"bws_s_aq21_tot",
                           "Agricultural":"bws_s_aq21_agg",
                           "Domestic":"bws_s_aq21_dom",
                           "Industrial":"bws_s_aq21_ind"},
                 inplace=True)
    return df_sel

def pre_process_aq30(df):
    df_sel = df.loc[(df["indicator_name"]=="bws") & (df["weight"]=="Tot")]
    df_sel.rename(columns={"score":"bws_s_aq30_tot",
                           "score_ranked":"aq30_rank"},
                 inplace=True)
    
    df_sel = df_sel[["gid_0","name_0","bws_s_aq30_tot","aq30_rank","fraction_valid"]]
    return df_sel

In [59]:
df_aq21 = pre_process_aq21(df_aq21_og)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [60]:
df_aq21.head()

Unnamed: 0,aq21_rank,aq30_gid_0,country_name,bws_s_aq21_tot,bws_s_aq21_agg,bws_s_aq21_dom,bws_s_aq21_ind
0,1,ESH,Western Sahara,5.0,,5.0,
1,1,ARE,United Arab Emirates,5.0,5.0,5.0,5.0
2,1,TTO,Trinidad and Tobago,5.0,5.0,5.0,5.0
3,1,SGP,Singapore,5.0,,5.0,5.0
4,1,SMR,San Marino,5.0,5.0,5.0,5.0


In [61]:
df_aq30 = pre_process_aq30(df_aq30_og)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [68]:
df_bws_aq30vs21 = df_aq21.merge(right=df_aq30,
                                how="inner",
                                left_on="aq30_gid_0",
                                right_on="gid_0")

In [69]:
df_bws_aq30vs21.head()

Unnamed: 0,aq21_rank,aq30_gid_0,country_name,bws_s_aq21_tot,bws_s_aq21_agg,bws_s_aq21_dom,bws_s_aq21_ind,gid_0,name_0,bws_s_aq30_tot,aq30_rank,fraction_valid
0,1,ESH,Western Sahara,5.0,,5.0,,ESH,Western Sahara,2.545013,59.0,0.999726
1,1,ARE,United Arab Emirates,5.0,5.0,5.0,5.0,ARE,United Arab Emirates,4.258251,12.0,0.993239
2,1,TTO,Trinidad and Tobago,5.0,5.0,5.0,5.0,TTO,Trinidad and Tobago,0.631979,134.0,0.99583
3,1,SGP,Singapore,5.0,,5.0,5.0,SGP,Singapore,0.010088,176.0,0.964865
4,1,SMR,San Marino,5.0,5.0,5.0,5.0,SMR,San Marino,4.137027,13.0,1.0


In [70]:
df_bws_aq30vs21 = df_bws_aq30vs21[["gid_0","name_0","bws_s_aq21_tot","bws_s_aq30_tot","aq21_rank","aq30_rank","fraction_valid"]]

In [71]:
df_bws_aq30vs21

Unnamed: 0,gid_0,name_0,bws_s_aq21_tot,bws_s_aq30_tot,aq21_rank,aq30_rank,fraction_valid
0,ESH,Western Sahara,5.00,2.545013e+00,1,59.0,0.999726
1,ARE,United Arab Emirates,5.00,4.258251e+00,1,12.0,0.993239
2,TTO,Trinidad and Tobago,5.00,6.319793e-01,1,134.0,0.995830
3,SGP,Singapore,5.00,1.008787e-02,1,176.0,0.964865
4,SMR,San Marino,5.00,4.137027e+00,1,13.0,1.000000
5,VCT,Saint Vincent and the Grenadines,5.00,6.319871e-01,1,130.0,0.815186
6,LCA,Saint Lucia,5.00,6.319871e-01,1,131.0,0.994939
7,QAT,Qatar,5.00,4.973635e+00,1,1.0,0.998752
8,MLT,Malta,5.00,4.705457e+00,1,4.0,1.000000
9,JAM,Jamaica,5.00,0.000000e+00,1,185.0,0.999185
