In [1]:
"""
Inspect Overall Water Risk per sector.

WARNING: Script is extremely quick and dirty and contains inefficiencies. 

Nomenclature

['BWS',
 'BWD',
 'GTD',
 'IAV',
 'SEV',
 'DRR',
 'RFR',
 'CFR',
 'UCW',
 'CEP',
 'UDW',
 'USA',
 'RRI']

['Baseline water stress',
 'Baseline water depletion ',
 'Groundwater table decline ',
 'Interannual variability ',
 'Seasonal variability',
 'Drought risk',
 'Riverine flood risk ',
 'Coastal flood risk ',
 'Untreated collected wastewater',
 'Coastal eutrophication potential',
 'Unimproved/no drinking water ',
 'Unimproved/no sanitation',
 'RepRisk Index (RRI)']

['DEF', 'AGR', 'FNB', 'CHE', 'ELP', 'SMC', 'ONG', 'MIN', 'CON', 'TEX']

['Default',
 'Agriculture',
 'Food & Beverage',
 'Chemicals',
 'Electric Power',
 'Semiconductor',
 'Oil & Gas',
 'Mining',
 'Construction Materials',
 'Textile']

"""


# User Input
INDUSTRY = "def" # one of ['DEF', 'AGR', 'FNB', 'CHE', 'ELP', 'SMC', 'ONG', 'MIN', 'CON', 'TEX']
STRING_ID = '453750-IND.31_1-3383' # Use Shapefile to find string_id

In [3]:
SCRIPT_NAME = 'Y2018M11D11_RH_QA_OWR_Inspector_Tool_V01'
OUTPUT_VERSION = 2

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME = "aqueduct30v01"
BQ_IN = "y2018m12d11_rh_master_weights_gpd_v01_v02"

BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

In [4]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2019M06D25 UTC 12:58


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [6]:
import os
import collections
import pandas as pd
import numpy as np
from google.cloud import bigquery

import plotly.plotly as py
import plotly.graph_objs as go


os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
%matplotlib inline

In [8]:
sql_nomenclature = """
SELECT
  *
FROM
  `aqueduct30.aqueduct30v01.y2018m12d06_rh_process_weights_bq_v01_v01`
ORDER BY
  id
"""

In [9]:
df_nomenclature = pd.read_gbq(query=sql_nomenclature,dialect="standard")

In [10]:
df_nomenclature

Unnamed: 0,id,group_full,group_short,indicator_full,indicator_short,industry_full,industry_short,weight_abs,weight_label,weight_interpretation,weight_fraction
0,1,Physical Risk Quantity,QAN,Baseline water stress,BWS,Default,DEF,4.0,Very High,Represents very high risk to the industry,0.163265
1,2,Physical Risk Quantity,QAN,Baseline water depletion,BWD,Default,DEF,4.0,Very High,Represents very high risk to the industry,0.163265
2,3,Physical Risk Quantity,QAN,Groundwater table decline,GTD,Default,DEF,4.0,Very High,Represents very high risk to the industry,0.163265
3,4,Physical Risk Quantity,QAN,Interannual variability,IAV,Default,DEF,0.5,Low,Represents low risk to the industry,0.020408
4,5,Physical Risk Quantity,QAN,Seasonal variability,SEV,Default,DEF,0.5,Low,Represents low risk to the industry,0.020408
5,6,Physical Risk Quantity,QAN,Drought risk,DRR,Default,DEF,2.0,High,Represents low risk to the industry,0.081633
6,7,Physical Risk Quantity,QAN,Riverine flood risk,RFR,Default,DEF,1.0,Medium,Represents medium risk to the industry,0.040816
7,8,Physical Risk Quantity,QAN,Coastal flood risk,CFR,Default,DEF,1.0,Medium,Represents medium risk to the industry,0.040816
8,9,Physical Risk Quality,QAL,Untreated collected wastewater,UCW,Default,DEF,2.0,High,Represents high risk to the industry,0.081633
9,10,Physical Risk Quality,QAL,Coastal eutrophication potential,CEP,Default,DEF,1.0,Medium,Represents medium risk to the industry,0.040816


In [12]:
indicators = list(df_nomenclature["indicator_short"].unique())
industries = list(df_nomenclature["industry_short"].unique())
indicators_full = list(df_nomenclature["indicator_full"].unique())
industries_full = list(df_nomenclature["industry_full"].unique())

In [13]:
indicators

['BWS',
 'BWD',
 'GTD',
 'IAV',
 'SEV',
 'DRR',
 'RFR',
 'CFR',
 'UCW',
 'CEP',
 'UDW',
 'USA',
 'RRI']

In [14]:
indicators_full

['Baseline water stress',
 'Baseline water depletion ',
 'Groundwater table decline ',
 'Interannual variability ',
 'Seasonal variability',
 'Drought risk',
 'Riverine flood risk ',
 'Coastal flood risk ',
 'Untreated collected wastewater',
 'Coastal eutrophication potential',
 'Unimproved/no drinking water ',
 'Unimproved/no sanitation',
 'RepRisk Index (RRI)']

In [15]:
industries

['DEF', 'AGR', 'FNB', 'CHE', 'ELP', 'SMC', 'ONG', 'MIN', 'CON', 'TEX']

In [16]:
industries_full

['Default',
 'Agriculture',
 'Food & Beverage',
 'Chemicals',
 'Electric Power',
 'Semiconductor',
 'Oil & Gas',
 'Mining',
 'Construction Materials',
 'Textile']

In [18]:
def get_hex_color(score):
    if score < 1:
        color = "#FFFF99"
    elif score < 2:
        color = "#FFE600"
    elif score < 3:
        color = "#FF9900"
    elif score < 4:
        color = "#FF1900"
    elif score <= 5:
        color = "#990000"
    else:
        color = "#4E4E4E"
    return color
    

def build_query(indicators,industry,string_id):
    sql = """
    SELECT
      aq30_id,
      string_id,
      pfaf_id,
      gid_1,
      aqid,
      area_km2,
      name_1,
      gid_0,
      name_0,
      delta_id,
    """
    for indicator in indicators:
        sql += "{}_score,".format(indicator)
        sql += "{}_{}_weight,".format(indicator,INDUSTRY)
        sql += "{}_{}_weightedscore,".format(indicator,INDUSTRY)
        
    sql += "{}_weight_sum,".format(INDUSTRY)
    sql += "{}_weightedscore_sum,".format(INDUSTRY)
    sql += "owr_{}_score".format(INDUSTRY)    

    sql += """
    FROM
      `{}.{}.{}`
    WHERE string_id = '{}'
    """.format(BQ_PROJECT_ID,BQ_OUTPUT_DATASET_NAME,BQ_IN,STRING_ID)
    return sql

sql = build_query(indicators,INDUSTRY,STRING_ID)
print(sql)
df_in = pd.read_gbq(query=sql,dialect="standard")

xs = []
ws = []
ss = []
for indicator in indicators:
    xs.append("{}".format(indicator))
    w = df_in.iloc[0]["{}_{}_weight".format(indicator,INDUSTRY)]
    s = df_in.iloc[0]["{}_score".format(indicator)]
    ws.append(w)
    ss.append(s)
    
df = pd.DataFrame({"x":xs,"weight":ws,"score":ss})
df = df.dropna()
df["cumweight"] = df["weight"].cumsum(axis=0)
df["offset"] = df["cumweight"] - (df["weight"]*0.5)
df["color"] = df["score"].apply(get_hex_color)

bar = go.Bar(name="Risk per indicator",
             x=df['offset'], # assign x as the dataframe column 'x'
             y=df['score'],
             width=df["weight"],
             text = df["x"],
             marker = {"color":list(df["color"])}
             )

owr_score = df_in.iloc[0]["owr_{}_score".format(INDUSTRY)]

line = go.Scatter(name = 'OWR {}'.format(INDUSTRY),
                  x = [0,df["cumweight"].max()],
                  y = [owr_score,owr_score],
                  line = {"dash":"dot",
                          "color":get_hex_color(owr_score)})
layout = go.Layout(
    barmode='stack',
    autosize=True,
    title='Risk Overview for {}'.format(STRING_ID),
    xaxis= {"title":"Weight for {}".format(INDUSTRY)},
    yaxis= {"title":"Score [0-5]"}
)
data = [bar, line]
fig = go.Figure(data=data, layout=layout)


    SELECT
      aq30_id,
      string_id,
      pfaf_id,
      gid_1,
      aqid,
      area_km2,
      name_1,
      gid_0,
      name_0,
      delta_id,
    BWS_score,BWS_def_weight,BWS_def_weightedscore,BWD_score,BWD_def_weight,BWD_def_weightedscore,GTD_score,GTD_def_weight,GTD_def_weightedscore,IAV_score,IAV_def_weight,IAV_def_weightedscore,SEV_score,SEV_def_weight,SEV_def_weightedscore,DRR_score,DRR_def_weight,DRR_def_weightedscore,RFR_score,RFR_def_weight,RFR_def_weightedscore,CFR_score,CFR_def_weight,CFR_def_weightedscore,UCW_score,UCW_def_weight,UCW_def_weightedscore,CEP_score,CEP_def_weight,CEP_def_weightedscore,UDW_score,UDW_def_weight,UDW_def_weightedscore,USA_score,USA_def_weight,USA_def_weightedscore,RRI_score,RRI_def_weight,RRI_def_weightedscore,def_weight_sum,def_weightedscore_sum,owr_def_score
    FROM
      `aqueduct30.aqueduct30v01.y2018m12d11_rh_master_weights_gpd_v02_v10`
    WHERE string_id = '453750-IND.31_1-3383'
    


GenericGBQException: Reason: 400 Unrecognized name: aq30_id at [3:7]

In [13]:
# plot Online

In [14]:
url = py.plot(fig, filename='pandas-bar-chart-layout')

In [15]:
url

'https://plot.ly/~rutgerhofste/4'

In [16]:
# Plot offline

In [17]:
py.iplot(fig, filename='pandas-bar-chart-layout')