# Thresholds WaterStress 

* Purpose of script: Double check the threshold setting for the water stress score of Aqueduct 30
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20170808

In [1]:
import numpy as np
import pandas as pd
import math
import sys
import boto3
import s3fs

In [2]:
INPUTPATH = "s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Thresholds_WaterStress_V01/input/calculatedWS03.csv"

In [3]:
OUTPUTPATH = "s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Thresholds_WaterStress_V01/output/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv"

In [4]:
TEMP_STORAGE_PATH = '/volumes/data/temp/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv'

In [5]:
sys.version

'3.5.3 |Continuum Analytics, Inc.| (default, Mar  6 2017, 11:58:13) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [6]:
df = pd.read_csv(INPUTPATH)

In [7]:
df.head()

Unnamed: 0.2,PFAF_ID,Unnamed: 0,Unnamed: 0.1,HYBAS_ID,NEXT_DOWN,NEXT_SINK,MAIN_BAS,DIST_SINK,DIST_MAIN,SUB_AREA,...,upstream_sum_volumem3_TotWW_monthY2014M11,upstream_sum_volumem3_TotWN_monthY2014M11,local_sum_volumem3_Runoff_monthY2014M11,ws_monthY2014M11,local_sum_volumem3_TotWW_monthY2014M12,local_sum_volumem3_TotWN_monthY2014M12,upstream_sum_volumem3_TotWW_monthY2014M12,upstream_sum_volumem3_TotWN_monthY2014M12,local_sum_volumem3_Runoff_monthY2014M12,ws_monthY2014M12
0,111011,0,0,1060000010,0,1060000000.0,1060000000.0,0.0,0.0,1890.8,...,0.0,0.0,69477.380838,73.17308,5062878.0,2354598.0,0.0,0.0,81085.620721,62.438667
1,111012,1,1,1060000100,0,1060000000.0,1060000000.0,0.0,0.0,2925.9,...,0.0,0.0,29361.957127,23.24595,682084.9,281276.6,0.0,0.0,35478.575166,19.225262
2,111013,2,2,1060000110,0,1060000000.0,1060000000.0,0.0,0.0,893.5,...,0.0,0.0,8503.375061,74.06315,629445.6,258427.9,0.0,0.0,9910.605142,63.512326
3,111014,3,3,1060000150,0,1060000000.0,1060000000.0,0.0,0.0,4217.3,...,0.0,0.0,0.015992,5432920.0,86533.64,41244.1,0.0,0.0,24135.710823,3.585295
4,111015,4,4,1060000160,0,1060000000.0,1060000000.0,0.0,0.0,16638.1,...,0.0,0.0,249.185016,37320.86,9289624.0,3870654.0,0.0,0.0,7520.378455,1235.260158


In [8]:
df.shape

(16397, 623)

In [9]:
print(df.columns.values)

['PFAF_ID' 'Unnamed: 0' 'Unnamed: 0.1' 'HYBAS_ID' 'NEXT_DOWN' 'NEXT_SINK'
 'MAIN_BAS' 'DIST_SINK' 'DIST_MAIN' 'SUB_AREA' 'UP_AREA' 'ENDO' 'COAST'
 'ORDER_' 'SORT' 'Upstream_HYBAS_IDs' 'Upstream_PFAF_IDs' 'PfafIDarea30sm2'
 'countarea30sm2' 'meanarea30sm2' 'PfafIDDomWN_monthY2014M01'
 'countDomWN_monthY2014M01' 'meanDomWN_monthY2014M01'
 'PfafIDDomWN_monthY2014M02' 'countDomWN_monthY2014M02'
 'meanDomWN_monthY2014M02' 'PfafIDDomWN_monthY2014M03'
 'countDomWN_monthY2014M03' 'meanDomWN_monthY2014M03'
 'PfafIDDomWN_monthY2014M04' 'countDomWN_monthY2014M04'
 'meanDomWN_monthY2014M04' 'PfafIDDomWN_monthY2014M05'
 'countDomWN_monthY2014M05' 'meanDomWN_monthY2014M05'
 'PfafIDDomWN_monthY2014M06' 'countDomWN_monthY2014M06'
 'meanDomWN_monthY2014M06' 'PfafIDDomWN_monthY2014M07'
 'countDomWN_monthY2014M07' 'meanDomWN_monthY2014M07'
 'PfafIDDomWN_monthY2014M08' 'countDomWN_monthY2014M08'
 'meanDomWN_monthY2014M08' 'PfafIDDomWN_monthY2014M09'
 'countDomWN_monthY2014M09' 'meanDomWN_monthY2014M09'
 '

Calculate catchment (local) area in [$m^2$]  
(You could also just use SUB_AREA but I wanted to double check and this is more precise)

In [10]:
df['area_m2']= df['meanarea30sm2']*df['countarea30sm2']

## Low water use

Low water use: Water Withdrawal (WW) < 0.012 $m/(m^2*year)$

local_sum_volumem3_TotWW_yearY2014 < 0.012 $m/(m^2*year)$

In [11]:
df['local_sum_m_TotWW_yearY2014'] =df['local_sum_volumem3_TotWW_yearY2014']/df['area_m2']

In [12]:
df['arid'] = df['local_sum_m_TotWW_yearY2014'] < 0.012

In [13]:
df.arid = df.arid.astype(int)

## Arid

Available Blue Water <0.03 m/(m^2*year)  

Available blue water = upstream runoff – upstream consumption (WN) + local runoff

In [14]:
dftemp = pd.DataFrame()

In [15]:
df['AvailableBlueWaterm3'] = df['upstream_sum_volumem3runoff_annua']- \
df['upstream_sum_volumem3_TotWN_yearY2014']+ \
df['local_sum_volumem3_Runoff_yearY2014']

Convert Volume to flux 

In [16]:
dftemp['AvailableBlueWaterm'] = df['AvailableBlueWaterm3'] / df['area_m2']

In [17]:
df['lowWaterUse'] = dftemp['AvailableBlueWaterm'] < 0.03

In [18]:
df.lowWaterUse = df.lowWaterUse.astype(int)

## Arid AND Low water use

In [19]:
df['aridAndLowWaterUse'] = df['lowWaterUse']&df['arid']

## Baseline Water Stress Categories

Baseline water stress raw value to category: $y = max(0,min \big(5,\frac{ln([rawValue])-ln(0.1)}{ln(2)}\big)+1) $

In [20]:
def categorizeBWS(rawValue):
    if rawValue ==0:
        catValue= 0
    elif rawValue < 0:
        catValue= -9999
    else: 
        catValue= max(0,min(5,((math.log(rawValue)-math.log(0.1))/(math.log(2)))))
    return catValue

In [21]:
df['BWS_s_excl_AridAndLow'] = df['ws_yearY2014'].apply(categorizeBWS)

Arid AND Low Water Use areas are considered category 5

In [22]:
df['BWS_s'] = df['BWS_s_excl_AridAndLow']

In [23]:
df['BWS_s'] = np.where(df['aridAndLowWaterUse'],5,df['BWS_s'])

# Negative Available Blue water

in the dat from Utrecht University it is possible to have negative local runoff values, leading to a negative available blue water value. These areas are water stressed and should have a categroy 5. This will affect 278 basins that have negative water, 486 basins with 0 water availabel and hence 764 basins in total (<=0)


In [24]:
df['BWS_s'] = np.where(dftemp['AvailableBlueWaterm'] <= 0 ,5,df['BWS_s'])

This results in a column with unrounded categorized scores, i.e. 1.2 instead of 2. In order to find the binned score you need to apply a ceiling function. 1.1 -> category 2, 3.2 -> category 4 etc. There is one exception, 0.0 becomes category 1, similar to Aqueduct 2.1

In [25]:
!mkdir /volumes/data/temp/

mkdir: cannot create directory '/volumes/data/temp/': File exists


In [26]:
df.to_csv(TEMP_STORAGE_PATH)

In [27]:
!aws s3 cp {TEMP_STORAGE_PATH} {OUTPUTPATH} --acl public-read

upload: ../../../../data/temp/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv to s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Thresholds_WaterStress_V01/output/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv


In [28]:
df.head()

Unnamed: 0.2,PFAF_ID,Unnamed: 0,Unnamed: 0.1,HYBAS_ID,NEXT_DOWN,NEXT_SINK,MAIN_BAS,DIST_SINK,DIST_MAIN,SUB_AREA,...,local_sum_volumem3_Runoff_monthY2014M12,ws_monthY2014M12,area_m2,local_sum_m_TotWW_yearY2014,arid,AvailableBlueWaterm3,lowWaterUse,aridAndLowWaterUse,BWS_s_excl_AridAndLow,BWS_s
0,111011,0,0,1060000010,0,1060000000.0,1060000000.0,0.0,0.0,1890.8,...,81085.620721,62.438667,1885917000.0,0.033141,0,701131.264059,1,0,5.0,5.0
1,111012,1,1,1060000100,0,1060000000.0,1060000000.0,0.0,0.0,2925.9,...,35478.575166,19.225262,2925797000.0,0.002801,1,301446.409818,1,1,5.0,5.0
2,111013,2,2,1060000110,0,1060000000.0,1060000000.0,0.0,0.0,893.5,...,9910.605142,63.512326,892422900.0,0.008471,1,87464.967484,1,1,5.0,5.0
3,111014,3,3,1060000150,0,1060000000.0,1060000000.0,0.0,0.0,4217.3,...,24135.710823,3.585295,4206268000.0,0.000248,1,161168.503704,1,1,5.0,5.0
4,111015,4,4,1060000160,0,1060000000.0,1060000000.0,0.0,0.0,16638.1,...,7520.378455,1235.260158,16597060000.0,0.006728,1,103208.167471,1,1,5.0,5.0


In [29]:
df.tail()

Unnamed: 0.2,PFAF_ID,Unnamed: 0,Unnamed: 0.1,HYBAS_ID,NEXT_DOWN,NEXT_SINK,MAIN_BAS,DIST_SINK,DIST_MAIN,SUB_AREA,...,local_sum_volumem3_Runoff_monthY2014M12,ws_monthY2014M12,area_m2,local_sum_m_TotWW_yearY2014,arid,AvailableBlueWaterm3,lowWaterUse,aridAndLowWaterUse,BWS_s_excl_AridAndLow,BWS_s
16392,312042,16392,16392,3060025460,3060001840,3060025000.0,3060002000.0,0.0,1.0,2489.4,...,648908.9,6.7218,2500363000.0,0.024376,0,204264600.0,0,0,1.577146,1.577146
16393,312016,16393,16393,3060025569,3060001840,3060026000.0,3060002000.0,0.0,1.0,5471.0,...,783368.9,2.190467,5492660000.0,0.005427,1,420112500.0,0,0,0.0,0.0
16394,312804,16394,16394,3060026330,3060496590,3060026000.0,3060002000.0,0.0,2229.1,539.7,...,359087.7,0.099818,542030700.0,0.000828,1,6849176.0,1,1,0.0,5.0
16395,322660,16395,16395,3060026650,3060607050,3060027000.0,3060005000.0,0.0,2549.4,445.5,...,1153308.0,0.025158,443674900.0,0.00087,1,23443360.0,0,0,0.0,0.0
16396,312670,16396,16396,3060027750,3060530840,3060028000.0,3060002000.0,0.0,2245.4,480.7,...,1254081.0,0.360008,482812000.0,0.011287,1,49946760.0,0,0,0.125738,0.125738


You can find the result on S3 in the location OUTPUTPATH

In [30]:
print(OUTPUTPATH)

s3://wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Thresholds_WaterStress_V01/output/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv


I made the output public and you should be able to download it using the following [URL](https://s3.amazonaws.com/wri-projects/Aqueduct30/processData/Y2017M08D08_RH_Thresholds_WaterStress_V01/output/Y2017M08D08_RH_Thresholds_WaterStress_V01_output.csv)