In [1]:
""" Create table with result, raw ma10 and ols with geometries.
-------------------------------------------------------------------------------

Create postGIS table for selected basins with all ma_10 indicators

Author: Rutger Hofste
Date: 20180622
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

TESTING = 1
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M06D22_RH_QA_result_PostGIS_V01'
OUTPUT_VERSION = 3

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME = 'y2018m06d28_rh_ws_full_range_ols_postgis_30spfaf06_v01_v02'
GEOM_TABLE = 'hybas06_v04'
OUTPUT_TABLE_NAME = SCRIPT_NAME.lower() + "_v{:02.0f}".format(OUTPUT_VERSION)
OUTPUT_SCHEMA = "test"

ec2_output_path = "/volumes/data/{}/output_V{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION)
s3_output_path = "s3://wri-projects/Aqueduct30/qaData/{}/output_V{:02.0f}/".format(SCRIPT_NAME,OUTPUT_VERSION)

print("\nOutput ec2: " + ec2_output_path,
      "\nOutput s3: " + s3_output_path)



Output ec2: /volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03 
Output s3: s3://wri-projects/Aqueduct30/qaData/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M06D28 UTC 16:15


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
if OVERWRITE_OUTPUT:
    !rm -r {ec2_output_path}
    !mkdir -p {ec2_output_path}
else:
    !mkdir -p {ec2_output_path}
    

rm: cannot remove '/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03': No such file or directory


In [4]:
# imports
%matplotlib inline
import re
import os
import random
import numpy as np
import pandas as pd
import bokeh.palettes
from datetime import timedelta
from sqlalchemy import *
from bokeh.plotting import figure 
from bokeh.io import output_notebook, show
from bokeh.models import HoverTool

pd.set_option('display.max_columns', 500)

In [5]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))

In [6]:
# What to compare

sql = "SELECT *"
sql +=" FROM {}".format(INPUT_TABLE_NAME)
sql +=" WHERE pfafid_30spfaf06 = 261492"
sql +=" AND temporal_resolution = 'year'"
sql +=" LIMIT 100000"
print(sql)
df = pd.read_sql(sql,engine)



SELECT * FROM y2018m06d28_rh_ws_full_range_ols_postgis_30spfaf06_v01_v02 WHERE pfafid_30spfaf06 = 261492 AND temporal_resolution = 'year' LIMIT 100000


In [7]:
# Select 2014 for annual and monthly ols full range water stress

temporal_resolutions = ["year","month"]

year = 2014 

dfs = {}

for temporal_resolution in temporal_resolutions:
    if temporal_resolution == 'year':
        sql = "SELECT *"
        sql +=" FROM {}".format(INPUT_TABLE_NAME)
        sql +=" WHERE"
        # filter
        sql +=" year = {:04.0f}".format(year) 
        sql +=" AND temporal_resolution = 'year'"
        sql +=" LIMIT 100000"
        df = pd.read_sql(sql,engine)
        
        output_file_name = "full_range_ols_ws_{}_Y{:04.0f}.csv".format(temporal_resolution,year)
        output_file_path = ec2_output_path + "/" + output_file_name
        print(output_file_path)
        
        df = df.fillna(-9999)
        df.to_csv(output_file_path)
        dfs[output_file_name] = df
    elif temporal_resolution == 'month':
        for month in range(1,13):
            print(month)
            sql = "SELECT *"
            sql +=" FROM {}".format(INPUT_TABLE_NAME)
            sql +=" WHERE"
            # filter
            sql +=" year = {:04.0f}".format(year) 
            sql +=" AND temporal_resolution = 'month'"
            sql +=" AND month = {}".format(month)
            sql +=" LIMIT 100000"
            df = pd.read_sql(sql,engine)
            output_file_name = "full_range_ols_ws_{}_Y{:04.0f}M{:02.0f}.csv".format(temporal_resolution,year,month)
            output_file_path = ec2_output_path + "/" + output_file_name
            print(output_file_path)
            
            df = df.fillna(-9999)
            df.to_csv(output_file_path)
            dfs[output_file_name] = df
    else:
        break





/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_year_Y2014.csv
1
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M01.csv
2
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M02.csv
3
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M03.csv
4
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M04.csv
5
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M05.csv
6
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M06.csv
7
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M07.csv
8
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M08.csv
9
/volumes/data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M09.

In [8]:
!aws s3 cp {ec2_output_path} {s3_output_path} --recursive

upload: ../../../../data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M01.csv to s3://wri-projects/Aqueduct30/qaData/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M01.csv
upload: ../../../../data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M02.csv to s3://wri-projects/Aqueduct30/qaData/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M02.csv
upload: ../../../../data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M03.csv to s3://wri-projects/Aqueduct30/qaData/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M03.csv
upload: ../../../../data/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M05.csv to s3://wri-projects/Aqueduct30/qaData/Y2018M06D22_RH_QA_result_PostGIS_V01/output_V03/full_range_ols_ws_month_Y2014M05.csv
upload: ../../../../data/Y2018M06D22_RH_QA_result_PostGIS_V01/ou

In [None]:
df = dfs["full_range_ols_ws_year_Y2014.csv"]

In [None]:
df.dtypes

In [None]:
df2 = df.loc[df["ols_ols10_waterstress_dimensionless_30spfaf06"] == -9999]

In [None]:
df2.dtypes

In [None]:
dfs.keys()

In [None]:
ax1 = df.plot.scatter("year","waterstress_dimensionless_30spfaf06")
ax1.set_ylim(df["waterstress_dimensionless_30spfaf06"].min(),df["waterstress_dimensionless_30spfaf06"].max())

In [None]:
ax1 = df.plot.scatter("year","ols10_waterstress_dimensionless_30spfaf06")
ax1.set_ylim(df["ols10_waterstress_dimensionless_30spfaf06"].min(),df["ols10_waterstress_dimensionless_30spfaf06"].max())

In [None]:
ax1 = df.plot.scatter("year","ols_ols10_waterstress_dimensionless_30spfaf06")
ax1.set_ylim(df["ols_ols10_waterstress_dimensionless_30spfaf06"].min(),df["ols_ols10_waterstress_dimensionless_30spfaf06"].max())

In [None]:
palette = bokeh.palettes.Category20

In [None]:
output_notebook()

In [None]:
p = figure(width=900, height=800)
p.line(x = df["year"], y = df["waterstress_dimensionless_30spfaf06"],color="black",legend= "10_waterstress_dimensionless_30spfaf06")
p.line(x = df["year"], y = df["ma10_waterstress_dimensionless_30spfaf06"],color="blue",legend= "ma10_waterstress_dimensionless_30spfaf06")
p.line(x = df["year"], y = df["ols10_waterstress_dimensionless_30spfaf06"],color="red",legend= "ols10_waterstress_dimensionless_30spfaf06")

p.legend.location = "top_left"
p.legend.click_policy="hide"
hover = HoverTool(tooltips = [('year', '@x'),
                             ('value',  '@y')])
p.add_tools(hover)

show(p)