In [1]:
""" Merge and simplify master table and annual scores based on months for deltas.
-------------------------------------------------------------------------------

Y2020M02D06 Update output 2-3 input left 2-3 input right 2-3

Author: Rutger Hofste
Date: 20180727
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M07D27_RH_Deltas_Merge_Simplify_Tables_V01'
OUTPUT_VERSION = 3

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME_LEFT = "y2018m07d27_rh_deltas_apply_aridlowonce_mask_v01_v03"
INPUT_TABLE_NAME_RIGHT = "y2018m07d27_rh_deltas_annual_scores_from_months_v01_v03"
OUTPUT_TABLE_NAME = SCRIPT_NAME.lower() + "_v{:02.0f}".format(OUTPUT_VERSION)

print("Input Table Left: " , INPUT_TABLE_NAME_LEFT, 
      "Input Table Right: " , INPUT_TABLE_NAME_RIGHT, 
      "\nOutput Table: " , OUTPUT_TABLE_NAME)

Input Table Left:  y2018m07d27_rh_deltas_apply_aridlowonce_mask_v01_v03 Input Table Right:  y2018m07d27_rh_deltas_annual_scores_from_months_v01_v03 
Output Table:  y2018m07d27_rh_deltas_merge_simplify_tables_v01_v03


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2020M02D19 UTC 11:11


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# imports
import re
import os
import numpy as np
import pandas as pd
import aqueduct3
from datetime import timedelta
from sqlalchemy import *
pd.set_option('display.max_columns', 500)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
#connection = engine.connect()

if OVERWRITE_OUTPUT:
    sql = "DROP TABLE IF EXISTS {};".format(OUTPUT_TABLE_NAME)
    print(sql)
    result = engine.execute(sql)

DROP TABLE IF EXISTS y2018m07d27_rh_deltas_merge_simplify_tables_v01_v03;


In [5]:
columns_to_keep_left = ["delta_id",
                        "temporal_resolution",
                        "year",
                        "month",
                        "area_m2_30spfaf06",
                        "area_count_30spfaf06"]

In [6]:
columns_to_keep_right = []

## Raw Data and Decadal Statistics

In [7]:
sectors = ["ptot",
           "pdom",
           "pind",
           "pirr",
           "pliv"]
use_types = ["ww","wn"]

In [8]:
decadal_indicators = []
for sector in sectors:
    for use_type in use_types:
        decadal_indicators.append("{}{}".format(sector,use_type))

In [9]:
decadal_indicators.append("riverdischarge")

In [10]:
decadal_statistics = ["",
                   "ma10_",
                   "min10_",
                   "max10_",
                   "slope10_",
                   "intercept10_",
                   "ols10_",
                   "capped_ols10_"]

In [11]:
for decadal_statistic in decadal_statistics:
    for decadal_indicator in decadal_indicators:
        indicator = "{}{}_m_30spfaf06".format(decadal_statistic,decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

ptotww_m_30spfaf06
ptotwn_m_30spfaf06
pdomww_m_30spfaf06
pdomwn_m_30spfaf06
pindww_m_30spfaf06
pindwn_m_30spfaf06
pirrww_m_30spfaf06
pirrwn_m_30spfaf06
plivww_m_30spfaf06
plivwn_m_30spfaf06
riverdischarge_m_30spfaf06
ma10_ptotww_m_30spfaf06
ma10_ptotwn_m_30spfaf06
ma10_pdomww_m_30spfaf06
ma10_pdomwn_m_30spfaf06
ma10_pindww_m_30spfaf06
ma10_pindwn_m_30spfaf06
ma10_pirrww_m_30spfaf06
ma10_pirrwn_m_30spfaf06
ma10_plivww_m_30spfaf06
ma10_plivwn_m_30spfaf06
ma10_riverdischarge_m_30spfaf06
min10_ptotww_m_30spfaf06
min10_ptotwn_m_30spfaf06
min10_pdomww_m_30spfaf06
min10_pdomwn_m_30spfaf06
min10_pindww_m_30spfaf06
min10_pindwn_m_30spfaf06
min10_pirrww_m_30spfaf06
min10_pirrwn_m_30spfaf06
min10_plivww_m_30spfaf06
min10_plivwn_m_30spfaf06
min10_riverdischarge_m_30spfaf06
max10_ptotww_m_30spfaf06
max10_ptotwn_m_30spfaf06
max10_pdomww_m_30spfaf06
max10_pdomwn_m_30spfaf06
max10_pindww_m_30spfaf06
max10_pindwn_m_30spfaf06
max10_pirrww_m_30spfaf06
max10_pirrwn_m_30spfaf06
max10_plivww_m_30spfaf06
max

## Statistics on Decadal Statistics

In [12]:
tier2_decadal_indicators = ["ptotww",
                            "ptotwn",
                            "riverdischarge"]

tier2_decadal_statistics_0 = ["ols_","avg_","min_","max_","slope_","intercept_"]
tier2_decadal_statistics_1 = ["ma10_","ols10_","capped_ols10_"]

for tier2_decadal_indicator in tier2_decadal_indicators:
    for tier2_decadal_statistic_0 in tier2_decadal_statistics_0:
        for tier2_decadal_statistic_1 in tier2_decadal_statistics_1:
            indicator = "{}{}{}_m_30spfaf06".format(tier2_decadal_statistic_0,tier2_decadal_statistic_1,tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)
            

ols_ma10_ptotww_m_30spfaf06
ols_ols10_ptotww_m_30spfaf06
ols_capped_ols10_ptotww_m_30spfaf06
avg_ma10_ptotww_m_30spfaf06
avg_ols10_ptotww_m_30spfaf06
avg_capped_ols10_ptotww_m_30spfaf06
min_ma10_ptotww_m_30spfaf06
min_ols10_ptotww_m_30spfaf06
min_capped_ols10_ptotww_m_30spfaf06
max_ma10_ptotww_m_30spfaf06
max_ols10_ptotww_m_30spfaf06
max_capped_ols10_ptotww_m_30spfaf06
slope_ma10_ptotww_m_30spfaf06
slope_ols10_ptotww_m_30spfaf06
slope_capped_ols10_ptotww_m_30spfaf06
intercept_ma10_ptotww_m_30spfaf06
intercept_ols10_ptotww_m_30spfaf06
intercept_capped_ols10_ptotww_m_30spfaf06
ols_ma10_ptotwn_m_30spfaf06
ols_ols10_ptotwn_m_30spfaf06
ols_capped_ols10_ptotwn_m_30spfaf06
avg_ma10_ptotwn_m_30spfaf06
avg_ols10_ptotwn_m_30spfaf06
avg_capped_ols10_ptotwn_m_30spfaf06
min_ma10_ptotwn_m_30spfaf06
min_ols10_ptotwn_m_30spfaf06
min_capped_ols10_ptotwn_m_30spfaf06
max_ma10_ptotwn_m_30spfaf06
max_ols10_ptotwn_m_30spfaf06
max_capped_ols10_ptotwn_m_30spfaf06
slope_ma10_ptotwn_m_30spfaf06
slope_ols10_ptot

## Complete TimeSeries Statistics

In [13]:
# for ptotww, ptotwn and riverdischarge, statistics based on full time series are available.
complete_timeseries_statistics = ["avg_",
                                  "min_",
                                  "max_",
                                  "slope_",
                                  "intercept_",
                                  "ols_"]

complete_timeseries_indicators = ["ptotww",
                                  "ptotwn",
                                  "riverdischarge"
                                  ] 


In [14]:
for complete_timeseries_statistic in complete_timeseries_statistics:
    for complete_timeseries_indicator in complete_timeseries_indicators:
        indicator = "{}{}_m_30spfaf06".format(complete_timeseries_statistic,complete_timeseries_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

avg_ptotww_m_30spfaf06
avg_ptotwn_m_30spfaf06
avg_riverdischarge_m_30spfaf06
min_ptotww_m_30spfaf06
min_ptotwn_m_30spfaf06
min_riverdischarge_m_30spfaf06
max_ptotww_m_30spfaf06
max_ptotwn_m_30spfaf06
max_riverdischarge_m_30spfaf06
slope_ptotww_m_30spfaf06
slope_ptotwn_m_30spfaf06
slope_riverdischarge_m_30spfaf06
intercept_ptotww_m_30spfaf06
intercept_ptotwn_m_30spfaf06
intercept_riverdischarge_m_30spfaf06
ols_ptotww_m_30spfaf06
ols_ptotwn_m_30spfaf06
ols_riverdischarge_m_30spfaf06


## Raw and Decadal Arid and Lowwater Use Columns

In [15]:
arid_lowwateruse_indicators = ["arid",
                               "lowwateruse",
                               "aridandlowwateruse"]

arid_lowwateruse_statistics_tier0 = ["",
                                     "ma10_",
                                     "ols10_"]

for arid_lowwateruse_indicator in arid_lowwateruse_indicators:
    for arid_lowwateruse_statistic_tier0 in arid_lowwateruse_statistics_tier0:
        indicator = "{}{}_boolean_30spfaf06".format(arid_lowwateruse_statistic_tier0,arid_lowwateruse_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

arid_boolean_30spfaf06
ma10_arid_boolean_30spfaf06
ols10_arid_boolean_30spfaf06
lowwateruse_boolean_30spfaf06
ma10_lowwateruse_boolean_30spfaf06
ols10_lowwateruse_boolean_30spfaf06
aridandlowwateruse_boolean_30spfaf06
ma10_aridandlowwateruse_boolean_30spfaf06
ols10_aridandlowwateruse_boolean_30spfaf06


## Statistics on Decadal Statistics Arid and Lowwater Use

In [16]:
lowarid_tier2_decadal_indicators = ["arid",
                                    "lowwateruse",
                                    "aridandlowwateruse"]

lowarid_tier2_decadal_statistics_0 = ["ols_"]
lowarid_tier2_decadal_statistics_1 = ["ols10_"]

for lowarid_tier2_decadal_indicator in lowarid_tier2_decadal_indicators:
    for lowarid_tier2_decadal_statistic_0 in lowarid_tier2_decadal_statistics_0:
        for lowarid_tier2_decadal_statistic_1 in lowarid_tier2_decadal_statistics_1:
            indicator = "{}{}{}_boolean_30spfaf06".format(lowarid_tier2_decadal_statistic_0,lowarid_tier2_decadal_statistic_1,lowarid_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

ols_ols10_arid_boolean_30spfaf06
ols_ols10_lowwateruse_boolean_30spfaf06
ols_ols10_aridandlowwateruse_boolean_30spfaf06


## Water Stress Decadal

In [17]:
waterstress_decadal_indicators = ["waterstress","waterdepletion"]

waterstress_decadal_statistics = ["",
                                  "ma10_",
                                  "ols10_",
                                  "capped_ols10_"]

for waterstress_decadal_indicator in waterstress_decadal_indicators:
    for waterstress_decadal_statistic in waterstress_decadal_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_decadal_statistic,waterstress_decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

waterstress_dimensionless_30spfaf06
ma10_waterstress_dimensionless_30spfaf06
ols10_waterstress_dimensionless_30spfaf06
capped_ols10_waterstress_dimensionless_30spfaf06
waterdepletion_dimensionless_30spfaf06
ma10_waterdepletion_dimensionless_30spfaf06
ols10_waterdepletion_dimensionless_30spfaf06
capped_ols10_waterdepletion_dimensionless_30spfaf06


## Statistics on Decadal Statistics Water Stress


In [18]:
waterstress_tier2_decadal_indicators = ["waterstress","waterdepletion"]

waterstress_tier2_decadal_statistics_0 = ["avg_","min_","max_","slope_","intercept_","ols_"]
waterstress_tier2_decadal_statistics_1 = ["ols10_","ma10_","capped_ols10_"]


for waterstress_tier2_decadal_indicator in waterstress_tier2_decadal_indicators:
    for waterstress_tier2_decadal_statistic_0 in waterstress_tier2_decadal_statistics_0:        
        for waterstress_tier2_decadal_statistic_1 in waterstress_tier2_decadal_statistics_1:
            indicator = "{}{}{}_dimensionless_30spfaf06".format(waterstress_tier2_decadal_statistic_0,waterstress_tier2_decadal_statistic_1,waterstress_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

avg_ols10_waterstress_dimensionless_30spfaf06
avg_ma10_waterstress_dimensionless_30spfaf06
avg_capped_ols10_waterstress_dimensionless_30spfaf06
min_ols10_waterstress_dimensionless_30spfaf06
min_ma10_waterstress_dimensionless_30spfaf06
min_capped_ols10_waterstress_dimensionless_30spfaf06
max_ols10_waterstress_dimensionless_30spfaf06
max_ma10_waterstress_dimensionless_30spfaf06
max_capped_ols10_waterstress_dimensionless_30spfaf06
slope_ols10_waterstress_dimensionless_30spfaf06
slope_ma10_waterstress_dimensionless_30spfaf06
slope_capped_ols10_waterstress_dimensionless_30spfaf06
intercept_ols10_waterstress_dimensionless_30spfaf06
intercept_ma10_waterstress_dimensionless_30spfaf06
intercept_capped_ols10_waterstress_dimensionless_30spfaf06
ols_ols10_waterstress_dimensionless_30spfaf06
ols_ma10_waterstress_dimensionless_30spfaf06
ols_capped_ols10_waterstress_dimensionless_30spfaf06
avg_ols10_waterdepletion_dimensionless_30spfaf06
avg_ma10_waterdepletion_dimensionless_30spfaf06
avg_capped_ols1

## Water Stress Complete Timeseries

In [19]:
waterstress_complete_indicators = ["waterstress","waterdepletion"]

waterstress_complete_statistics = ["min_",
                                   "max_",
                                   "avg_",
                                   "slope_",
                                   "intercept_",
                                   "ols_"]

for waterstress_complete_indicator in waterstress_complete_indicators:
    for waterstress_complete_statistic in waterstress_complete_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_complete_statistic,waterstress_complete_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

min_waterstress_dimensionless_30spfaf06
max_waterstress_dimensionless_30spfaf06
avg_waterstress_dimensionless_30spfaf06
slope_waterstress_dimensionless_30spfaf06
intercept_waterstress_dimensionless_30spfaf06
ols_waterstress_dimensionless_30spfaf06
min_waterdepletion_dimensionless_30spfaf06
max_waterdepletion_dimensionless_30spfaf06
avg_waterdepletion_dimensionless_30spfaf06
slope_waterdepletion_dimensionless_30spfaf06
intercept_waterdepletion_dimensionless_30spfaf06
ols_waterdepletion_dimensionless_30spfaf06


In [20]:
sql = "SELECT"
for column_to_keep_left in columns_to_keep_left:
    sql += " {},".format(column_to_keep_left)
sql = sql[:-1]
sql += " FROM {}".format(INPUT_TABLE_NAME_LEFT)
sql += " LIMIT 100"

In [21]:
sql

'SELECT delta_id, temporal_resolution, year, month, area_m2_30spfaf06, area_count_30spfaf06, ptotww_m_30spfaf06, ptotwn_m_30spfaf06, pdomww_m_30spfaf06, pdomwn_m_30spfaf06, pindww_m_30spfaf06, pindwn_m_30spfaf06, pirrww_m_30spfaf06, pirrwn_m_30spfaf06, plivww_m_30spfaf06, plivwn_m_30spfaf06, riverdischarge_m_30spfaf06, ma10_ptotww_m_30spfaf06, ma10_ptotwn_m_30spfaf06, ma10_pdomww_m_30spfaf06, ma10_pdomwn_m_30spfaf06, ma10_pindww_m_30spfaf06, ma10_pindwn_m_30spfaf06, ma10_pirrww_m_30spfaf06, ma10_pirrwn_m_30spfaf06, ma10_plivww_m_30spfaf06, ma10_plivwn_m_30spfaf06, ma10_riverdischarge_m_30spfaf06, min10_ptotww_m_30spfaf06, min10_ptotwn_m_30spfaf06, min10_pdomww_m_30spfaf06, min10_pdomwn_m_30spfaf06, min10_pindww_m_30spfaf06, min10_pindwn_m_30spfaf06, min10_pirrww_m_30spfaf06, min10_pirrwn_m_30spfaf06, min10_plivww_m_30spfaf06, min10_plivwn_m_30spfaf06, min10_riverdischarge_m_30spfaf06, max10_ptotww_m_30spfaf06, max10_ptotwn_m_30spfaf06, max10_pdomww_m_30spfaf06, max10_pdomwn_m_30spfaf06

In [22]:
sql = "SELECT * FROM {} LIMIT 10".format(INPUT_TABLE_NAME_LEFT)

In [23]:
df_complete = pd.read_sql(sql,engine)

In [24]:
df_complete.head()

Unnamed: 0,delta_id,temporal_resolution,year,month,area_m2_30spfaf06,area_count_30spfaf06,pdomww_m_30spfaf06,pdomwn_m_30spfaf06,pindww_m_30spfaf06,pindwn_m_30spfaf06,pirrww_m_30spfaf06,pirrwn_m_30spfaf06,plivww_m_30spfaf06,plivwn_m_30spfaf06,ptotww_m_30spfaf06,ptotwn_m_30spfaf06,riverdischarge_m_30spfaf06,ma10_pdomww_m_30spfaf06,min10_pdomww_m_30spfaf06,max10_pdomww_m_30spfaf06,slope10_pdomww_m_30spfaf06,intercept10_pdomww_m_30spfaf06,ols10_pdomww_m_30spfaf06,ma10_pdomwn_m_30spfaf06,min10_pdomwn_m_30spfaf06,max10_pdomwn_m_30spfaf06,slope10_pdomwn_m_30spfaf06,intercept10_pdomwn_m_30spfaf06,ols10_pdomwn_m_30spfaf06,ma10_pindww_m_30spfaf06,min10_pindww_m_30spfaf06,max10_pindww_m_30spfaf06,slope10_pindww_m_30spfaf06,intercept10_pindww_m_30spfaf06,ols10_pindww_m_30spfaf06,ma10_pindwn_m_30spfaf06,min10_pindwn_m_30spfaf06,max10_pindwn_m_30spfaf06,slope10_pindwn_m_30spfaf06,intercept10_pindwn_m_30spfaf06,ols10_pindwn_m_30spfaf06,ma10_pirrww_m_30spfaf06,min10_pirrww_m_30spfaf06,max10_pirrww_m_30spfaf06,slope10_pirrww_m_30spfaf06,intercept10_pirrww_m_30spfaf06,ols10_pirrww_m_30spfaf06,ma10_pirrwn_m_30spfaf06,min10_pirrwn_m_30spfaf06,max10_pirrwn_m_30spfaf06,slope10_pirrwn_m_30spfaf06,intercept10_pirrwn_m_30spfaf06,ols10_pirrwn_m_30spfaf06,ma10_plivww_m_30spfaf06,min10_plivww_m_30spfaf06,max10_plivww_m_30spfaf06,slope10_plivww_m_30spfaf06,intercept10_plivww_m_30spfaf06,ols10_plivww_m_30spfaf06,ma10_plivwn_m_30spfaf06,min10_plivwn_m_30spfaf06,max10_plivwn_m_30spfaf06,slope10_plivwn_m_30spfaf06,intercept10_plivwn_m_30spfaf06,ols10_plivwn_m_30spfaf06,ma10_ptotww_m_30spfaf06,min10_ptotww_m_30spfaf06,max10_ptotww_m_30spfaf06,slope10_ptotww_m_30spfaf06,intercept10_ptotww_m_30spfaf06,ols10_ptotww_m_30spfaf06,ma10_ptotwn_m_30spfaf06,min10_ptotwn_m_30spfaf06,max10_ptotwn_m_30spfaf06,slope10_ptotwn_m_30spfaf06,intercept10_ptotwn_m_30spfaf06,ols10_ptotwn_m_30spfaf06,ma10_riverdischarge_m_30spfaf06,min10_riverdischarge_m_30spfaf06,max10_riverdischarge_m_30spfaf06,slope10_riverdischarge_m_30spfaf06,intercept10_riverdischarge_m_30spfaf06,ols10_riverdischarge_m_30spfaf06,capped_ols10_pdomww_m_30spfaf06,capped_ols10_pdomwn_m_30spfaf06,capped_ols10_pindww_m_30spfaf06,capped_ols10_pindwn_m_30spfaf06,capped_ols10_pirrww_m_30spfaf06,capped_ols10_pirrwn_m_30spfaf06,capped_ols10_plivww_m_30spfaf06,capped_ols10_plivwn_m_30spfaf06,capped_ols10_ptotww_m_30spfaf06,capped_ols10_ptotwn_m_30spfaf06,capped_ols10_riverdischarge_m_30spfaf06,arid_boolean_30spfaf06,ma10_arid_boolean_30spfaf06,ols10_arid_boolean_30spfaf06,lowwateruse_boolean_30spfaf06,ma10_lowwateruse_boolean_30spfaf06,ols10_lowwateruse_boolean_30spfaf06,aridandlowwateruse_boolean_30spfaf06,ma10_aridandlowwateruse_boolean_30spfaf06,ols10_aridandlowwateruse_boolean_30spfaf06,waterstress_dimensionless_30spfaf06,waterdepletion_dimensionless_30spfaf06,ma10_waterstress_dimensionless_30spfaf06,ma10_waterdepletion_dimensionless_30spfaf06,ols10_waterstress_dimensionless_30spfaf06,ols10_waterdepletion_dimensionless_30spfaf06,capped_ols10_waterstress_dimensionless_30spfaf06,capped_ols10_waterdepletion_dimensionless_30spfaf06,avg_waterstress_dimensionless_30spfaf06,min_waterstress_dimensionless_30spfaf06,max_waterstress_dimensionless_30spfaf06,slope_waterstress_dimensionless_30spfaf06,intercept_waterstress_dimensionless_30spfaf06,ols_waterstress_dimensionless_30spfaf06,avg_waterdepletion_dimensionless_30spfaf06,min_waterdepletion_dimensionless_30spfaf06,max_waterdepletion_dimensionless_30spfaf06,slope_waterdepletion_dimensionless_30spfaf06,intercept_waterdepletion_dimensionless_30spfaf06,ols_waterdepletion_dimensionless_30spfaf06,avg_riverdischarge_m_30spfaf06,min_riverdischarge_m_30spfaf06,max_riverdischarge_m_30spfaf06,slope_riverdischarge_m_30spfaf06,intercept_riverdischarge_m_30spfaf06,ols_riverdischarge_m_30spfaf06,avg_ptotww_m_30spfaf06,min_ptotww_m_30spfaf06,max_ptotww_m_30spfaf06,slope_ptotww_m_30spfaf06,intercept_ptotww_m_30spfaf06,ols_ptotww_m_30spfaf06,avg_ptotwn_m_30spfaf06,min_ptotwn_m_30spfaf06,max_ptotwn_m_30spfaf06,slope_ptotwn_m_30spfaf06,intercept_ptotwn_m_30spfaf06,ols_ptotwn_m_30spfaf06,avg_ma10_waterstress_dimensionless_30spfaf06,min_ma10_waterstress_dimensionless_30spfaf06,max_ma10_waterstress_dimensionless_30spfaf06,slope_ma10_waterstress_dimensionless_30spfaf06,intercept_ma10_waterstress_dimensionless_30spfaf06,ols_ma10_waterstress_dimensionless_30spfaf06,avg_ma10_waterdepletion_dimensionless_30spfaf06,min_ma10_waterdepletion_dimensionless_30spfaf06,max_ma10_waterdepletion_dimensionless_30spfaf06,slope_ma10_waterdepletion_dimensionless_30spfaf06,intercept_ma10_waterdepletion_dimensionless_30spfaf06,ols_ma10_waterdepletion_dimensionless_30spfaf06,avg_ma10_riverdischarge_m_30spfaf06,min_ma10_riverdischarge_m_30spfaf06,max_ma10_riverdischarge_m_30spfaf06,slope_ma10_riverdischarge_m_30spfaf06,intercept_ma10_riverdischarge_m_30spfaf06,ols_ma10_riverdischarge_m_30spfaf06,avg_ma10_ptotww_m_30spfaf06,min_ma10_ptotww_m_30spfaf06,max_ma10_ptotww_m_30spfaf06,slope_ma10_ptotww_m_30spfaf06,intercept_ma10_ptotww_m_30spfaf06,ols_ma10_ptotww_m_30spfaf06,avg_ma10_ptotwn_m_30spfaf06,min_ma10_ptotwn_m_30spfaf06,max_ma10_ptotwn_m_30spfaf06,slope_ma10_ptotwn_m_30spfaf06,intercept_ma10_ptotwn_m_30spfaf06,ols_ma10_ptotwn_m_30spfaf06,avg_ols10_waterstress_dimensionless_30spfaf06,min_ols10_waterstress_dimensionless_30spfaf06,max_ols10_waterstress_dimensionless_30spfaf06,slope_ols10_waterstress_dimensionless_30spfaf06,intercept_ols10_waterstress_dimensionless_30spfaf06,ols_ols10_waterstress_dimensionless_30spfaf06,avg_ols10_waterdepletion_dimensionless_30spfaf06,min_ols10_waterdepletion_dimensionless_30spfaf06,max_ols10_waterdepletion_dimensionless_30spfaf06,slope_ols10_waterdepletion_dimensionless_30spfaf06,intercept_ols10_waterdepletion_dimensionless_30spfaf06,ols_ols10_waterdepletion_dimensionless_30spfaf06,avg_ols10_riverdischarge_m_30spfaf06,min_ols10_riverdischarge_m_30spfaf06,max_ols10_riverdischarge_m_30spfaf06,slope_ols10_riverdischarge_m_30spfaf06,intercept_ols10_riverdischarge_m_30spfaf06,ols_ols10_riverdischarge_m_30spfaf06,avg_ols10_ptotww_m_30spfaf06,min_ols10_ptotww_m_30spfaf06,max_ols10_ptotww_m_30spfaf06,slope_ols10_ptotww_m_30spfaf06,intercept_ols10_ptotww_m_30spfaf06,ols_ols10_ptotww_m_30spfaf06,avg_ols10_ptotwn_m_30spfaf06,min_ols10_ptotwn_m_30spfaf06,max_ols10_ptotwn_m_30spfaf06,slope_ols10_ptotwn_m_30spfaf06,intercept_ols10_ptotwn_m_30spfaf06,ols_ols10_ptotwn_m_30spfaf06,avg_capped_ols10_waterstress_dimensionless_30spfaf06,min_capped_ols10_waterstress_dimensionless_30spfaf06,max_capped_ols10_waterstress_dimensionless_30spfaf06,slope_capped_ols10_waterstress_dimensionless_30spfaf06,intercept_capped_ols10_waterstress_dimensionless_30spfaf06,ols_capped_ols10_waterstress_dimensionless_30spfaf06,avg_capped_ols10_waterdepletion_dimensionless_30spfaf06,min_capped_ols10_waterdepletion_dimensionless_30spfaf06,max_capped_ols10_waterdepletion_dimensionless_30spfaf06,slope_capped_ols10_waterdepletion_dimensionless_30spfaf06,intercept_capped_ols10_waterdepletion_dimensionless_30spfaf06,ols_capped_ols10_waterdepletion_dimensionless_30spfaf06,avg_capped_ols10_riverdischarge_m_30spfaf06,min_capped_ols10_riverdischarge_m_30spfaf06,max_capped_ols10_riverdischarge_m_30spfaf06,slope_capped_ols10_riverdischarge_m_30spfaf06,intercept_capped_ols10_riverdischarge_m_30spfaf06,ols_capped_ols10_riverdischarge_m_30spfaf06,avg_capped_ols10_ptotww_m_30spfaf06,min_capped_ols10_ptotww_m_30spfaf06,max_capped_ols10_ptotww_m_30spfaf06,slope_capped_ols10_ptotww_m_30spfaf06,intercept_capped_ols10_ptotww_m_30spfaf06,ols_capped_ols10_ptotww_m_30spfaf06,avg_capped_ols10_ptotwn_m_30spfaf06,min_capped_ols10_ptotwn_m_30spfaf06,max_capped_ols10_ptotwn_m_30spfaf06,slope_capped_ols10_ptotwn_m_30spfaf06,intercept_capped_ols10_ptotwn_m_30spfaf06,ols_capped_ols10_ptotwn_m_30spfaf06,ols_ols10_arid_boolean_30spfaf06,ols_ols10_lowwateruse_boolean_30spfaf06,ols_ols10_aridandlowwateruse_boolean_30spfaf06
0,1.0,month,1960,1,63941820000.0,78273.0,4e-05,3.5e-05,4.2e-05,1.7e-05,0.006501,0.00273,9.185213e-07,9.185213e-07,0.006584,0.002783,0.183502,4e-05,4e-05,4e-05,,,,3.5e-05,3.5e-05,3.5e-05,,,,4.2e-05,4.2e-05,4.2e-05,,,,1.7e-05,1.7e-05,1.7e-05,,,,0.006501,0.006501,0.006501,,,,0.00273,0.00273,0.00273,,,,9.185213e-07,9.185213e-07,9.185213e-07,,,,9.185213e-07,9.185213e-07,9.185213e-07,,,,0.006584,0.006584,0.006584,,,,0.002783,0.002783,0.002783,,,,0.183502,0.183502,0.183502,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0.035878,0.015167,0.035878,0.015167,,,,,0.035878,0.035878,0.035878,,,2.0,0.015167,0.015167,0.015167,,,2.0,0.183502,0.183502,0.183502,,,2.0,0.006584,0.006584,0.006584,,,2.0,0.002783,0.002783,0.002783,,,2.0,0.035878,0.035878,0.035878,,,2.0,0.015167,0.015167,0.015167,,,2.0,0.183502,0.183502,0.183502,,,2.0,0.006584,0.006584,0.006584,,,2.0,0.002783,0.002783,0.002783,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,,,,,,2.0,0,0,0
1,1.0,month,1961,1,63941820000.0,78273.0,3.9e-05,3.4e-05,4.1e-05,1.6e-05,0.006217,0.002611,9.2392e-07,9.2392e-07,0.006298,0.002662,0.138037,3.9e-05,3.9e-05,4e-05,-8.347077e-07,0.001676,3.9e-05,3.4e-05,3.4e-05,3.5e-05,-8.749372e-07,0.00175,3.4e-05,4.2e-05,4.1e-05,4.2e-05,-7.673602e-07,0.001546,4.1e-05,1.7e-05,1.6e-05,1.7e-05,-3.06944e-07,0.000618,1.6e-05,0.006359,0.006217,0.006501,-0.000285,0.564311,0.006217,0.002671,0.002611,0.00273,-0.00012,0.237011,0.002611,9.212206e-07,9.185213e-07,9.2392e-07,5.398709e-09,-1e-05,9.2392e-07,9.212206e-07,9.185213e-07,9.2392e-07,5.398709e-09,-1e-05,9.2392e-07,0.006441,0.006298,0.006584,-0.000286,0.567523,0.006298,0.002723,0.002662,0.002783,-0.000121,0.239369,0.002662,0.160769,0.138037,0.183502,-0.045466,89.296043,0.138037,3.9e-05,3.4e-05,4.1e-05,1.6e-05,0.006217,0.002611,9.2392e-07,9.2392e-07,0.006298,0.002662,0.138037,0,0,0,0,0,0,0,0,0,0.045622,0.019288,0.040062,0.016936,0.045622,0.019288,0.045622,0.019288,0.04075,0.035878,0.045622,0.009744,-19.062497,0.045622,0.017227,0.015167,0.019288,0.004121,-8.062032,0.019288,0.160769,0.138037,0.183502,-0.045466,89.296043,0.138037,0.006441,0.006298,0.006584,-0.000286,0.567523,0.006298,0.002723,0.002662,0.002783,-0.000121,0.239369,0.002662,0.03797,0.035878,0.040062,0.004183,-8.163054,0.040062,0.016051,0.015167,0.016936,0.001769,-3.452375,0.016936,0.172136,0.160769,0.183502,-0.022733,44.739773,0.160769,0.006512,0.006441,0.006584,-0.000143,0.287054,0.006441,0.002753,0.002723,0.002783,-6e-05,0.121076,0.002723,0.045622,0.045622,0.045622,,,2.0,0.019288,0.019288,0.019288,,,2.0,0.138037,0.138037,0.138037,,,2.0,0.006298,0.006298,0.006298,,,2.0,0.002662,0.002662,0.002662,,,2.0,0.045622,0.045622,0.045622,,,2.0,0.019288,0.019288,0.019288,,,2.0,0.138037,0.138037,0.138037,,,2.0,0.006298,0.006298,0.006298,,,2.0,0.002662,0.002662,0.002662,,,2.0,0,0,0
2,1.0,month,1962,1,63941820000.0,78273.0,3.9e-05,3.4e-05,4.1e-05,1.6e-05,0.006707,0.002817,9.318297e-07,9.318297e-07,0.006789,0.002869,0.159697,3.9e-05,3.9e-05,4e-05,-1.807257e-07,0.000394,3.9e-05,3.4e-05,3.4e-05,3.5e-05,-2.306404e-07,0.000487,3.4e-05,4.1e-05,4.1e-05,4.2e-05,-5.785319e-07,0.001176,4.1e-05,1.7e-05,1.6e-05,1.7e-05,-2.314127e-07,0.00047,1.6e-05,0.006475,0.006217,0.006707,0.000103,-0.195775,0.006578,0.00272,0.002611,0.002817,4.3e-05,-0.082226,0.002763,9.24757e-07,9.185213e-07,9.318297e-07,6.654189e-09,-1.2e-05,9.314112e-07,9.24757e-07,9.185213e-07,9.318297e-07,6.654189e-09,-1.2e-05,9.314112e-07,0.006557,0.006298,0.006789,0.000102,-0.194218,0.006659,0.002771,0.002662,0.002869,4.3e-05,-0.081281,0.002814,0.160412,0.138037,0.183502,-0.011902,23.501096,0.14851,3.9e-05,3.4e-05,4.1e-05,1.6e-05,0.006578,0.002763,9.314112e-07,9.314112e-07,0.006659,0.002814,0.14851,0,0,0,0,0,0,0,0,0,0.042509,0.017964,0.040874,0.017277,0.044839,0.01895,0.044839,0.01895,0.041337,0.035878,0.045622,0.003315,-6.459717,0.044652,0.017473,0.015167,0.019288,0.001399,-2.725537,0.018872,0.160412,0.138037,0.183502,-0.011902,23.501096,0.14851,0.006557,0.006298,0.006789,0.000102,-0.194218,0.006659,0.002771,0.002662,0.002869,4.3e-05,-0.081281,0.002814,0.038938,0.035878,0.040874,0.002498,-4.858888,0.041435,0.01646,0.015167,0.017277,0.001055,-2.052816,0.017515,0.168228,0.160412,0.183502,-0.011545,22.808124,0.156683,0.006527,0.006441,0.006584,-1.4e-05,0.03314,0.006513,0.002759,0.002723,0.002783,-6e-06,0.014193,0.002753,0.045231,0.044839,0.045622,-0.000784,1.582209,0.044839,0.019119,0.01895,0.019288,-0.000337,0.680718,0.01895,0.143273,0.138037,0.14851,0.010473,-20.399494,0.14851,0.006478,0.006298,0.006659,0.000361,-0.702477,0.006659,0.002738,0.002662,0.002814,0.000152,-0.295229,0.002814,0.045231,0.044839,0.045622,-0.000784,1.582209,0.044839,0.019119,0.01895,0.019288,-0.000337,0.680718,0.01895,0.143273,0.138037,0.14851,0.010473,-20.399494,0.14851,0.006478,0.006298,0.006659,0.000361,-0.702477,0.006659,0.002738,0.002662,0.002814,0.000152,-0.295229,0.002814,0,0,0
3,1.0,month,1963,1,63941820000.0,78273.0,4.1e-05,3.6e-05,4.2e-05,1.7e-05,0.009005,0.003782,9.986882e-07,9.986882e-07,0.009089,0.003836,0.125454,4e-05,3.9e-05,4.1e-05,4.609633e-07,-0.000864,4e-05,3.5e-05,3.4e-05,3.6e-05,3.597074e-07,-0.000671,3.5e-05,4.1e-05,4.1e-05,4.2e-05,-1.194921e-07,0.000276,4.1e-05,1.7e-05,1.6e-05,1.7e-05,-4.77968e-08,0.00011,1.7e-05,0.007108,0.006217,0.009005,0.0008,-1.56265,0.008308,0.002985,0.002611,0.003782,0.000336,-0.656313,0.003489,9.432398e-07,9.185213e-07,9.986882e-07,2.484106e-08,-4.8e-05,9.805014e-07,9.432398e-07,9.185213e-07,9.986882e-07,2.484106e-08,-4.8e-05,9.805014e-07,0.00719,0.006298,0.009089,0.000801,-1.563286,0.008391,0.003038,0.002662,0.003836,0.000336,-0.656921,0.003542,0.151673,0.125454,0.183502,-0.015248,30.061265,0.1288,4e-05,3.5e-05,4.1e-05,1.7e-05,0.008308,0.003489,9.805014e-07,9.805014e-07,0.008391,0.003542,0.1288,0,0,0,0,0,0,0,0,0,0.072448,0.030575,0.047403,0.020027,0.065145,0.027502,0.065145,0.027502,0.049114,0.035878,0.072448,0.01066,-20.859702,0.065104,0.020748,0.015167,0.030575,0.00449,-8.786978,0.027484,0.151673,0.125454,0.183502,-0.015248,30.061265,0.1288,0.00719,0.006298,0.009089,0.000801,-1.563286,0.008391,0.003038,0.002662,0.003836,0.000336,-0.656921,0.003542,0.041054,0.035878,0.047403,0.003539,-6.899769,0.046362,0.017352,0.015167,0.020027,0.001492,-2.909652,0.01959,0.164089,0.151673,0.183502,-0.009585,18.964321,0.149712,0.006693,0.006441,0.00719,0.000193,-0.372616,0.006983,0.002829,0.002723,0.003038,8.1e-05,-0.156443,0.002951,0.051869,0.044839,0.065145,0.009761,-19.09972,0.06163,0.021913,0.01895,0.027502,0.004107,-8.036097,0.02602,0.138449,0.1288,0.14851,-0.004618,9.199475,0.13383,0.007116,0.006298,0.008391,0.001047,-2.046223,0.008162,0.003006,0.002662,0.003542,0.00044,-0.8601,0.003446,0.051869,0.044839,0.065145,0.009761,-19.09972,0.06163,0.021913,0.01895,0.027502,0.004107,-8.036097,0.02602,0.138449,0.1288,0.14851,-0.004618,9.199475,0.13383,0.007116,0.006298,0.008391,0.001047,-2.046223,0.008162,0.003006,0.002662,0.003542,0.00044,-0.8601,0.003446,0,0,0
4,1.0,month,1964,1,63941820000.0,78273.0,4.3e-05,3.8e-05,4.3e-05,1.7e-05,0.007903,0.003319,1.071288e-06,1.071288e-06,0.00799,0.003376,0.151351,4.1e-05,3.9e-05,4.3e-05,9.716947e-07,-0.001866,4.2e-05,3.5e-05,3.4e-05,3.8e-05,8.210459e-07,-0.001575,3.7e-05,4.2e-05,4.1e-05,4.3e-05,2.861403e-07,-0.00052,4.2e-05,1.7e-05,1.6e-05,1.7e-05,1.144561e-07,-0.000208,1.7e-05,0.007267,0.006217,0.009005,0.000559,-1.089829,0.008385,0.003052,0.002611,0.003782,0.000235,-0.457728,0.003522,9.688495e-07,9.185213e-07,1.071288e-06,3.803019e-08,-7.4e-05,1.04491e-06,9.688495e-07,9.185213e-07,1.071288e-06,3.803019e-08,-7.4e-05,1.04491e-06,0.00735,0.006298,0.009089,0.00056,-1.092289,0.008471,0.003105,0.002662,0.003836,0.000236,-0.459585,0.003577,0.151608,0.125454,0.183502,-0.007688,15.236411,0.136231,4.2e-05,3.7e-05,4.2e-05,1.7e-05,0.008385,0.003522,1.04491e-06,1.04491e-06,0.008471,0.003577,0.136231,0,0,0,0,0,0,0,0,0,0.052794,0.022303,0.048479,0.020481,0.062179,0.026255,0.062179,0.026255,0.04985,0.035878,0.072448,0.006066,-11.851051,0.061982,0.021059,0.015167,0.030575,0.002556,-4.993788,0.026171,0.151608,0.125454,0.183502,-0.007688,15.236411,0.136231,0.00735,0.006298,0.009089,0.00056,-1.092289,0.008471,0.003105,0.002662,0.003836,0.000236,-0.459585,0.003577,0.042539,0.035878,0.048479,0.003254,-6.34237,0.049048,0.017978,0.015167,0.020481,0.001372,-2.674,0.020722,0.161593,0.151608,0.183502,-0.007288,14.461568,0.147016,0.006824,0.006441,0.00735,0.000228,-0.440747,0.00728,0.002884,0.002723,0.003105,9.6e-05,-0.185241,0.003076,0.054446,0.044839,0.065145,0.006998,-13.678531,0.064943,0.022999,0.01895,0.027502,0.002945,-5.757399,0.027417,0.137894,0.1288,0.14851,-0.002513,5.068764,0.134126,0.007455,0.006298,0.008471,0.000825,-1.611864,0.008692,0.003149,0.002662,0.003577,0.000347,-0.678051,0.00367,0.054446,0.044839,0.065145,0.006998,-13.678531,0.064943,0.022999,0.01895,0.027502,0.002945,-5.757399,0.027417,0.137894,0.1288,0.14851,-0.002513,5.068764,0.134126,0.007455,0.006298,0.008471,0.000825,-1.611864,0.008692,0.003149,0.002662,0.003577,0.000347,-0.678051,0.00367,0,0,0


In [25]:
all_columns = list(df_complete)

In [26]:
len(all_columns)

234

In [27]:
len(columns_to_keep_left)

234

In [28]:
# What Columns are excluded?
missing_columns = set(all_columns) - set(columns_to_keep_left)

In [29]:
columns_to_keep_right = ["avg1y_ols_capped_ols10_waterstress_dimensionless_30spfaf06",
                         "avg1y_ols_capped_ols10_weighted_waterstress_dimensionless_30spfaf06",
                         "avg1y_ols_capped_ols10_waterdepletion_dimensionless_30spfaf06",
                         "avg1y_ols_capped_ols10_weighted_waterdepletion_dimensionless_30spfaf06"]

In [30]:
sql =  "CREATE TABLE {} AS".format(OUTPUT_TABLE_NAME)
sql += " SELECT "
for column_to_keep_left in columns_to_keep_left:
    sql += " l.{},".format(column_to_keep_left)
for column_to_keep_right in columns_to_keep_right:
    sql += " r.{},".format(column_to_keep_right)
sql = sql[:-1]
sql += " FROM {} l".format(INPUT_TABLE_NAME_LEFT)
sql += " INNER JOIN {} r ON".format(INPUT_TABLE_NAME_RIGHT)
sql += " CONCAT(l.delta_id,l.year) = CONCAT(r.delta_id,r.year)"

In [31]:
sql

'CREATE TABLE y2018m07d27_rh_deltas_merge_simplify_tables_v01_v03 AS SELECT  l.delta_id, l.temporal_resolution, l.year, l.month, l.area_m2_30spfaf06, l.area_count_30spfaf06, l.ptotww_m_30spfaf06, l.ptotwn_m_30spfaf06, l.pdomww_m_30spfaf06, l.pdomwn_m_30spfaf06, l.pindww_m_30spfaf06, l.pindwn_m_30spfaf06, l.pirrww_m_30spfaf06, l.pirrwn_m_30spfaf06, l.plivww_m_30spfaf06, l.plivwn_m_30spfaf06, l.riverdischarge_m_30spfaf06, l.ma10_ptotww_m_30spfaf06, l.ma10_ptotwn_m_30spfaf06, l.ma10_pdomww_m_30spfaf06, l.ma10_pdomwn_m_30spfaf06, l.ma10_pindww_m_30spfaf06, l.ma10_pindwn_m_30spfaf06, l.ma10_pirrww_m_30spfaf06, l.ma10_pirrwn_m_30spfaf06, l.ma10_plivww_m_30spfaf06, l.ma10_plivwn_m_30spfaf06, l.ma10_riverdischarge_m_30spfaf06, l.min10_ptotww_m_30spfaf06, l.min10_ptotwn_m_30spfaf06, l.min10_pdomww_m_30spfaf06, l.min10_pdomwn_m_30spfaf06, l.min10_pindww_m_30spfaf06, l.min10_pindwn_m_30spfaf06, l.min10_pirrww_m_30spfaf06, l.min10_pirrwn_m_30spfaf06, l.min10_plivww_m_30spfaf06, l.min10_plivwn_m_30

In [32]:
result = engine.execute(sql)

In [33]:
sql_index = "CREATE INDEX {}delta_id ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"delta_id")

In [34]:
result = engine.execute(sql_index)

In [35]:
sql_index2 = "CREATE INDEX {}year ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"year")

In [36]:
result = engine.execute(sql_index2)

In [37]:
sql_index3 = "CREATE INDEX {}month ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"month")

In [38]:
result = engine.execute(sql_index3)

In [39]:
engine.dispose()

In [40]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:00:02.945952


Previous runs:  
0:00:02.447159  
0:00:02.080728  
0:00:02.926066  
0:00:02.884164
