In [1]:
"""Post process aggregations from riverine flood risk.
-------------------------------------------------------------------------------

Riverine flood risk calculated per province by research partner.

Author: Rutger Hofste
Date: 20190411
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

SCRIPT_NAME = "Y2019M04D11_RH_GA_RFR_Post_Process_V01"
OUTPUT_VERSION = 1

S3_INPUT_PATH = "s3://wri-projects/Aqueduct30/finalData/Floods"
INPUT_FILE_NAME = "flood_State_results.csv"

BQ_PROJECT_ID = "aqueduct30"
BQ_DATASET_NAME = "aqueduct30v01"
BQ_INPUT_TABLE_NAME_LABEL = "y2018m12d04_rh_master_merge_rawdata_gpd_v02_v09"
BQ_INPUT_TABLE_NAME_GADM  = "y2018m11d12_rh_gadm36_level1_rds_to_bq_v01_v01"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()


ec2_input_path = "/volumes/data/{}/input_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 
ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION) 

s3_output_path = "s3://wri-projects/Aqueduct30/processData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("S3_INPUT_PATH: ",S3_INPUT_PATH,
      "\nec2_input_path: ",ec2_input_path,
      "\nec2_output_path: ",ec2_output_path,
      "\ns3_output_path: " + s3_output_path  )

S3_INPUT_PATH:  s3://wri-projects/Aqueduct30/finalData/Floods 
ec2_input_path:  /volumes/data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01 
ec2_output_path:  /volumes/data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/output_V01 
s3_output_path: s3://wri-projects/Aqueduct30/processData/Y2019M04D11_RH_GA_RFR_Post_Process_V01/output_V01/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M04D11 UTC 15:36


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
!rm -r {ec2_input_path}
!rm -r {ec2_output_path}
!mkdir -p {ec2_input_path}
!mkdir -p {ec2_output_path}

In [4]:
!aws s3 cp {S3_INPUT_PATH} {ec2_input_path} --recursive --exclude 'inundationMaps/*'

download: s3://wri-projects/Aqueduct30/finalData/Floods/README.txt to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/README.txt
download: s3://wri-projects/Aqueduct30/finalData/Floods/flood_State_results.CPG to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/flood_State_results.CPG
download: s3://wri-projects/Aqueduct30/finalData/Floods/flood_State_results.shp.xml to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/flood_State_results.shp.xml
download: s3://wri-projects/Aqueduct30/finalData/Floods/flood_State_results.sbx to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/flood_State_results.sbx
download: s3://wri-projects/Aqueduct30/finalData/Floods/flood_State_results.prj to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/flood_State_results.prj
download: s3://wri-projects/Aqueduct30/finalData/Floods/flood_results.CPG to ../../../../data/Y2019M04D11_RH_GA_RFR_Post_Process_V01/input_V01/flood_resu

In [5]:
import os
import numpy as np
import pandas as pd
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

## Labels

In [6]:
sql = """
SELECT
  indicator,
  AVG(cat) AS cat,
  label
FROM
  `{}.{}.{}`
GROUP BY
  label, indicator
ORDER BY
  indicator, cat
""".format(BQ_PROJECT_ID,BQ_DATASET_NAME,BQ_INPUT_TABLE_NAME_LABEL)

In [7]:
df_gadm_1 = pd.read_gbq(query=sql,
                        project_id =BQ_PROJECT_ID,
                        dialect="standard")

## GADM Level 1 names

In [8]:
sql = """
SELECT
  gid_1,
  gid_0,
  name_1,
  name_0
FROM
  `{}.{}.{}`
ORDER BY
  gid_1
""".format(BQ_PROJECT_ID,BQ_DATASET_NAME,BQ_INPUT_TABLE_NAME_GADM)

In [9]:
df_gadm_1 = pd.read_gbq(query=sql,
                        project_id =BQ_PROJECT_ID,
                        dialect="standard")

## GADM Level 0 names

In [10]:
sql = """
SELECT
  name_0,
  ANY_VALUE(gid_0) as gid_0
FROM
  `{}.{}.{}`
GROUP BY
  name_0
ORDER BY
  name_0
""".format(BQ_PROJECT_ID,BQ_DATASET_NAME,BQ_INPUT_TABLE_NAME_GADM)

In [11]:
df_gadm_0 = pd.read_gbq(query=sql,
                       project_id =BQ_PROJECT_ID,
                       dialect="standard")

Process rfr data, similar to Y2018M12D04_RH_RFR_CFR_BQ_V01

In [12]:
files = os.listdir(ec2_input_path)

In [13]:
input_path = "{}/{}".format(ec2_input_path,INPUT_FILE_NAME)

In [42]:
df = pd.read_csv(input_path)

In [43]:
df_out = df.rename(columns={"PFAF_ID":"pfaf_id",
                            "RVR_raw":"rfr_raw",
                            "CST_raw":"cfr_raw",
                            "RVR_s":"rfr_score",
                            "CST_s":"cfr_score",
                            "RVR_cat":"rfr_label",
                            "CST_cat":"cfr_label"})

In [44]:
df_out

Unnamed: 0,gid_1,River_pop_impacted,Coast_pop_impacted,pop_total,rfr_raw,cfr_raw,rfr_score,cfr_score,rfr_label,cfr_label
0,AFG.10_1,15448.517737,0.000000,7.899676e+05,0.019556,0.000000e+00,4.013803,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
1,AFG.11_1,39737.448058,0.000000,1.097044e+06,0.036222,0.000000e+00,4.085621,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
2,AFG.12_1,27325.224355,0.000000,2.210070e+06,0.012364,0.000000e+00,3.573244,0.000000,"High (7 in 1,000 to 1 in 100)","Low (0 to 3 in 1,000,000)"
3,AFG.13_1,31245.573814,0.000000,6.612397e+05,0.047253,0.000000e+00,4.133154,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
4,AFG.14_1,235595.627560,0.000000,4.708566e+06,0.050036,0.000000e+00,4.145144,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
5,AFG.15_1,27739.272494,0.000000,1.499761e+06,0.018496,0.000000e+00,4.009235,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
6,AFG.16_1,22177.158232,0.000000,5.233148e+05,0.042378,0.000000e+00,4.112148,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
7,AFG.17_1,20689.393227,0.000000,6.506962e+05,0.031796,0.000000e+00,4.066546,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
8,AFG.18_1,39662.723129,0.000000,5.426908e+05,0.073085,0.000000e+00,4.244469,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"
9,AFG.19_1,25586.088036,0.000000,1.181331e+06,0.021659,0.000000e+00,4.022864,0.000000,Extremely High (more than 1 in 100),"Low (0 to 3 in 1,000,000)"


In [45]:
df_out.drop(columns=["Coast_pop_impacted","cfr_raw","cfr_score","cfr_label","rfr_raw"],inplace=True)

In [46]:
df_out["rfr_label"].unique()

array(['Extremely High (more than 1 in 100)',
       'High (7 in 1,000 to 1 in 100)', 'Low (0 to 1 in 1,000)',
       'Low to medium (1 in 1,000 to 3 in 1,000)',
       'Medium to high (3 in 1,000 to 7 in 1,000)'], dtype=object)

In [47]:
def update_labels_rfr(label):
    # update labels to be consistent with rest of framework
    if label == "Low (0 to 1 in 1,000)":
        new_label = "Low (0 to 1 in 1,000)"
    elif label == "Low to medium (1 in 1,000 to 3 in 1,000)":
        new_label = "Low - Medium (1 in 1,000 to 3 in 1,000)"
    elif label == "Medium to high (3 in 1,000 to 7 in 1,000)":
        new_label = "Medium - High (3 in 1,000 to 7 in 1,000)"
    elif label == "High (7 in 1,000 to 1 in 100)":
        new_label = "High (7 in 1,000 to 1 in 100)"
    elif label == "Extremely High (more than 1 in 100)":
        new_label = "Extremely High (more than 1 in 100)"
    else:
        new_label = "error, check script"
    return new_label

def category_from_labels_rfr(label):
    if label == "Low (0 to 1 in 1,000)":
        cat = 0
    elif label == "Low to medium (1 in 1,000 to 3 in 1,000)":
        cat = 1
    elif label == "Medium to high (3 in 1,000 to 7 in 1,000)":
        cat = 2
    elif label == "High (7 in 1,000 to 1 in 100)":
        cat =3
    elif label == "Extremely High (more than 1 in 100)":
        cat = 4
    else:
        cat = -9999
    return cat

In [48]:
df_out["rfr_cat"] = df_out["rfr_label"].apply(category_from_labels_rfr)
df_out["rfr_label"] = df_out["rfr_label"].apply(update_labels_rfr)

In [49]:
df_out["rfr_label"].unique()

array(['Extremely High (more than 1 in 100)',
       'High (7 in 1,000 to 1 in 100)', 'Low (0 to 1 in 1,000)',
       'Low - Medium (1 in 1,000 to 3 in 1,000)',
       'Medium - High (3 in 1,000 to 7 in 1,000)'], dtype=object)

In [50]:
df_out["rfr_cat"].unique()

array([4, 3, 0, 1, 2])

In [51]:
df_out["indicator_name"] = "rfr"
df_out["weight"] = "pop"

In [52]:
df_out.head()

Unnamed: 0,gid_1,River_pop_impacted,pop_total,rfr_score,rfr_label,rfr_cat,indicator_name,weight
0,AFG.10_1,15448.517737,789967.6,4.013803,Extremely High (more than 1 in 100),4,rfr,pop
1,AFG.11_1,39737.448058,1097044.0,4.085621,Extremely High (more than 1 in 100),4,rfr,pop
2,AFG.12_1,27325.224355,2210070.0,3.573244,"High (7 in 1,000 to 1 in 100)",3,rfr,pop
3,AFG.13_1,31245.573814,661239.7,4.133154,Extremely High (more than 1 in 100),4,rfr,pop
4,AFG.14_1,235595.62756,4708566.0,4.145144,Extremely High (more than 1 in 100),4,rfr,pop


In [56]:
df_out2 = df_out.rename(columns={"River_pop_impacted":"weight",
                                 "pop_total":"sum_weights"})

In [58]:
df_out2.head()

Unnamed: 0,gid_1,weight,sum_weights,rfr_score,rfr_label,rfr_cat,indicator_name,weight.1
0,AFG.10_1,15448.517737,789967.6,4.013803,Extremely High (more than 1 in 100),4,rfr,pop
1,AFG.11_1,39737.448058,1097044.0,4.085621,Extremely High (more than 1 in 100),4,rfr,pop
2,AFG.12_1,27325.224355,2210070.0,3.573244,"High (7 in 1,000 to 1 in 100)",3,rfr,pop
3,AFG.13_1,31245.573814,661239.7,4.133154,Extremely High (more than 1 in 100),4,rfr,pop
4,AFG.14_1,235595.62756,4708566.0,4.145144,Extremely High (more than 1 in 100),4,rfr,pop


In [59]:
df_gid_1 = pd.merge(left=df_out2,
                    right=df_gadm_1,
                    how="left",
                    left_on="gid_1",
                    right_on="gid_1")

In [60]:
df_gid_1.head()

Unnamed: 0,gid_1,weight,sum_weights,rfr_score,rfr_label,rfr_cat,indicator_name,weight.1,gid_0,name_1,name_0
0,AFG.10_1,15448.517737,789967.6,4.013803,Extremely High (more than 1 in 100),4,rfr,pop,AFG,Ghor,Afghanistan
1,AFG.11_1,39737.448058,1097044.0,4.085621,Extremely High (more than 1 in 100),4,rfr,pop,AFG,Hilmand,Afghanistan
2,AFG.12_1,27325.224355,2210070.0,3.573244,"High (7 in 1,000 to 1 in 100)",3,rfr,pop,AFG,Hirat,Afghanistan
3,AFG.13_1,31245.573814,661239.7,4.133154,Extremely High (more than 1 in 100),4,rfr,pop,AFG,Jawzjan,Afghanistan
4,AFG.14_1,235595.62756,4708566.0,4.145144,Extremely High (more than 1 in 100),4,rfr,pop,AFG,Kabul,Afghanistan


In [61]:
def province_to_country(df):
    """ Convert province level dataframe to country level
    DataFrame
    
    
    """   
    df["gid_0"] = df["gid_1"].apply(lambda x:  x.split(".")[0])
    
    grouped = df.groupby('gid_0')
    df_country = df.groupby(by="gid_0",as_index=False).sum()
    return df_country

In [62]:
df_gid_0 = province_to_country(df_gid_1)

In [63]:
df_gid_0

Unnamed: 0,gid_0,weight,sum_weights,rfr_score,rfr_cat
0,AFG,1.002293e+06,3.150257e+07,137.306549,133
1,AGO,3.415051e+04,1.909378e+07,26.083760,16
2,ALA,0.000000e+00,0.000000e+00,0.000000,0
3,ALB,1.980385e+04,3.212938e+06,30.073746,25
4,AND,0.000000e+00,6.054270e+02,0.000000,0
5,ARE,8.910554e+04,7.451208e+06,20.928628,18
6,ARG,1.578279e+05,4.047814e+07,50.759432,40
7,ARM,1.686951e+04,3.112434e+06,23.812027,21
8,ATF,0.000000e+00,2.144749e+06,0.000000,0
9,ATG,0.000000e+00,0.000000e+00,0.000000,0


Use population to weight states to get to country`