In [1]:
""" Merge and simplify master table and annual scores based on months.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20180712
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M07D12_RH_Merge_Simplify_Tables_PostGIS_V01'
OUTPUT_VERSION = 6

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME_LEFT = "y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v02"
INPUT_TABLE_NAME_RIGHT = "y2018m07d12_rh_annual_scores_from_months_postgis_v01_v05"
OUTPUT_TABLE_NAME = SCRIPT_NAME.lower() + "_v{:02.0f}".format(OUTPUT_VERSION)

print("Input Table Left: " , INPUT_TABLE_NAME_LEFT, 
      "Input Table Right: " , INPUT_TABLE_NAME_RIGHT, 
      "\nOutput Table: " , OUTPUT_TABLE_NAME)

Input Table Left:  y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v02 Input Table Right:  y2018m07d12_rh_annual_scores_from_months_postgis_v01_v05 
Output Table:  y2018m07d12_rh_merge_simplify_tables_postgis_v01_v06


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M08D22 UTC 09:41


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# imports
import re
import os
import numpy as np
import pandas as pd
import aqueduct3
from datetime import timedelta
from sqlalchemy import *
pd.set_option('display.max_columns', 500)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
#connection = engine.connect()

if OVERWRITE_OUTPUT:
    sql = "DROP TABLE IF EXISTS {};".format(OUTPUT_TABLE_NAME)
    print(sql)
    result = engine.execute(sql)

DROP TABLE IF EXISTS y2018m07d12_rh_merge_simplify_tables_postgis_v01_v06;


In [5]:
columns_to_keep_left = ["pfafid_30spfaf06",
                        "temporal_resolution",
                        "year",
                        "month",
                        "area_m2_30spfaf06",
                        "area_count_30spfaf06"]

In [6]:
columns_to_keep_right = []

## Raw Data and Decadal Statistics

In [7]:
sectors = ["ptot",
           "pdom",
           "pind",
           "pirr",
           "pliv"]
use_types = ["ww","wn"]

In [8]:
decadal_indicators = []
for sector in sectors:
    for use_type in use_types:
        decadal_indicators.append("{}{}".format(sector,use_type))

In [9]:
decadal_indicators.append("riverdischarge")

In [10]:
decadal_statistics = ["",
                   "ma10_",
                   "min10_",
                   "max10_",
                   "slope10_",
                   "intercept10_",
                   "ols10_",
                   "capped_ols10_"]

In [11]:
for decadal_statistic in decadal_statistics:
    for decadal_indicator in decadal_indicators:
        indicator = "{}{}_m_30spfaf06".format(decadal_statistic,decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

ptotww_m_30spfaf06
ptotwn_m_30spfaf06
pdomww_m_30spfaf06
pdomwn_m_30spfaf06
pindww_m_30spfaf06
pindwn_m_30spfaf06
pirrww_m_30spfaf06
pirrwn_m_30spfaf06
plivww_m_30spfaf06
plivwn_m_30spfaf06
riverdischarge_m_30spfaf06
ma10_ptotww_m_30spfaf06
ma10_ptotwn_m_30spfaf06
ma10_pdomww_m_30spfaf06
ma10_pdomwn_m_30spfaf06
ma10_pindww_m_30spfaf06
ma10_pindwn_m_30spfaf06
ma10_pirrww_m_30spfaf06
ma10_pirrwn_m_30spfaf06
ma10_plivww_m_30spfaf06
ma10_plivwn_m_30spfaf06
ma10_riverdischarge_m_30spfaf06
min10_ptotww_m_30spfaf06
min10_ptotwn_m_30spfaf06
min10_pdomww_m_30spfaf06
min10_pdomwn_m_30spfaf06
min10_pindww_m_30spfaf06
min10_pindwn_m_30spfaf06
min10_pirrww_m_30spfaf06
min10_pirrwn_m_30spfaf06
min10_plivww_m_30spfaf06
min10_plivwn_m_30spfaf06
min10_riverdischarge_m_30spfaf06
max10_ptotww_m_30spfaf06
max10_ptotwn_m_30spfaf06
max10_pdomww_m_30spfaf06
max10_pdomwn_m_30spfaf06
max10_pindww_m_30spfaf06
max10_pindwn_m_30spfaf06
max10_pirrww_m_30spfaf06
max10_pirrwn_m_30spfaf06
max10_plivww_m_30spfaf06
max

## Statistics on Decadal Statistics

In [12]:
tier2_decadal_indicators = ["ptotww",
                            "ptotwn",
                            "riverdischarge"]

tier2_decadal_statistics_0 = ["ols_","avg_","min_","max_","slope_","intercept_"]
tier2_decadal_statistics_1 = ["ma10_","ols10_"]

for tier2_decadal_indicator in tier2_decadal_indicators:
    for tier2_decadal_statistic_0 in tier2_decadal_statistics_0:
        for tier2_decadal_statistic_1 in tier2_decadal_statistics_1:
            indicator = "{}{}{}_m_30spfaf06".format(tier2_decadal_statistic_0,tier2_decadal_statistic_1,tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)
            
    


ols_ma10_ptotww_m_30spfaf06
ols_ols10_ptotww_m_30spfaf06
avg_ma10_ptotww_m_30spfaf06
avg_ols10_ptotww_m_30spfaf06
min_ma10_ptotww_m_30spfaf06
min_ols10_ptotww_m_30spfaf06
max_ma10_ptotww_m_30spfaf06
max_ols10_ptotww_m_30spfaf06
slope_ma10_ptotww_m_30spfaf06
slope_ols10_ptotww_m_30spfaf06
intercept_ma10_ptotww_m_30spfaf06
intercept_ols10_ptotww_m_30spfaf06
ols_ma10_ptotwn_m_30spfaf06
ols_ols10_ptotwn_m_30spfaf06
avg_ma10_ptotwn_m_30spfaf06
avg_ols10_ptotwn_m_30spfaf06
min_ma10_ptotwn_m_30spfaf06
min_ols10_ptotwn_m_30spfaf06
max_ma10_ptotwn_m_30spfaf06
max_ols10_ptotwn_m_30spfaf06
slope_ma10_ptotwn_m_30spfaf06
slope_ols10_ptotwn_m_30spfaf06
intercept_ma10_ptotwn_m_30spfaf06
intercept_ols10_ptotwn_m_30spfaf06
ols_ma10_riverdischarge_m_30spfaf06
ols_ols10_riverdischarge_m_30spfaf06
avg_ma10_riverdischarge_m_30spfaf06
avg_ols10_riverdischarge_m_30spfaf06
min_ma10_riverdischarge_m_30spfaf06
min_ols10_riverdischarge_m_30spfaf06
max_ma10_riverdischarge_m_30spfaf06
max_ols10_riverdischarge_m_30

## Complete TimeSeries Statistics

In [13]:
# for ptotww, ptotwn and riverdischarge, statistics based on full time series are available.
complete_timeseries_statistics = ["avg_",
                                  "min_",
                                  "max_",
                                  "slope_",
                                  "intercept_",
                                  "ols_"]

complete_timeseries_indicators = ["ptotww",
                                  "ptotwn",
                                  "riverdischarge"
                                  ] 


In [14]:
for complete_timeseries_statistic in complete_timeseries_statistics:
    for complete_timeseries_indicator in complete_timeseries_indicators:
        indicator = "{}{}_m_30spfaf06".format(complete_timeseries_statistic,complete_timeseries_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

avg_ptotww_m_30spfaf06
avg_ptotwn_m_30spfaf06
avg_riverdischarge_m_30spfaf06
min_ptotww_m_30spfaf06
min_ptotwn_m_30spfaf06
min_riverdischarge_m_30spfaf06
max_ptotww_m_30spfaf06
max_ptotwn_m_30spfaf06
max_riverdischarge_m_30spfaf06
slope_ptotww_m_30spfaf06
slope_ptotwn_m_30spfaf06
slope_riverdischarge_m_30spfaf06
intercept_ptotww_m_30spfaf06
intercept_ptotwn_m_30spfaf06
intercept_riverdischarge_m_30spfaf06
ols_ptotww_m_30spfaf06
ols_ptotwn_m_30spfaf06
ols_riverdischarge_m_30spfaf06


## Raw and Decadal Arid and Lowwater Use Columns

In [15]:
arid_lowwateruse_indicators = ["arid",
                               "lowwateruse",
                               "aridandlowwateruse"]

arid_lowwateruse_statistics_tier0 = ["",
                                     "ma10_",
                                     "ols10_"]

for arid_lowwateruse_indicator in arid_lowwateruse_indicators:
    for arid_lowwateruse_statistic_tier0 in arid_lowwateruse_statistics_tier0:
        indicator = "{}{}_boolean_30spfaf06".format(arid_lowwateruse_statistic_tier0,arid_lowwateruse_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



arid_boolean_30spfaf06
ma10_arid_boolean_30spfaf06
ols10_arid_boolean_30spfaf06
lowwateruse_boolean_30spfaf06
ma10_lowwateruse_boolean_30spfaf06
ols10_lowwateruse_boolean_30spfaf06
aridandlowwateruse_boolean_30spfaf06
ma10_aridandlowwateruse_boolean_30spfaf06
ols10_aridandlowwateruse_boolean_30spfaf06


## Statistics on Decadal Statistics Arid and Lowwater Use

In [16]:
lowarid_tier2_decadal_indicators = ["arid",
                                    "lowwateruse",
                                    "aridandlowwateruse"]

lowarid_tier2_decadal_statistics_0 = ["ols_"]
lowarid_tier2_decadal_statistics_1 = ["ols10_"]

for lowarid_tier2_decadal_indicator in lowarid_tier2_decadal_indicators:
    for lowarid_tier2_decadal_statistic_0 in lowarid_tier2_decadal_statistics_0:
        for lowarid_tier2_decadal_statistic_1 in lowarid_tier2_decadal_statistics_1:
            indicator = "{}{}{}_boolean_30spfaf06".format(lowarid_tier2_decadal_statistic_0,lowarid_tier2_decadal_statistic_1,lowarid_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

ols_ols10_arid_boolean_30spfaf06
ols_ols10_lowwateruse_boolean_30spfaf06
ols_ols10_aridandlowwateruse_boolean_30spfaf06


## Water Stress Decadal

In [17]:
waterstress_decadal_indicators = ["waterstress","waterdepletion"]

waterstress_decadal_statistics = ["",
                                  "ma10_",
                                  "ols10_"]

for waterstress_decadal_indicator in waterstress_decadal_indicators:
    for waterstress_decadal_statistic in waterstress_decadal_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_decadal_statistic,waterstress_decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)


waterstress_dimensionless_30spfaf06
ma10_waterstress_dimensionless_30spfaf06
ols10_waterstress_dimensionless_30spfaf06
waterdepletion_dimensionless_30spfaf06
ma10_waterdepletion_dimensionless_30spfaf06
ols10_waterdepletion_dimensionless_30spfaf06


## Statistics on Decadal Statistics Water Stress

In [18]:
waterstress_tier2_decadal_indicators = ["waterstress","waterdepletion"]

waterstress_tier2_decadal_statistics_0 = ["avg_","min_","max_","slope_","intercept_","ols_"]
waterstress_tier2_decadal_statistics_1 = ["ols10_","ma10_"]


for waterstress_tier2_decadal_indicator in waterstress_tier2_decadal_indicators:
    for waterstress_tier2_decadal_statistic_0 in waterstress_tier2_decadal_statistics_0:        
        for waterstress_tier2_decadal_statistic_1 in waterstress_tier2_decadal_statistics_1:
            indicator = "{}{}{}_dimensionless_30spfaf06".format(waterstress_tier2_decadal_statistic_0,waterstress_tier2_decadal_statistic_1,waterstress_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

avg_ols10_waterstress_dimensionless_30spfaf06
avg_ma10_waterstress_dimensionless_30spfaf06
min_ols10_waterstress_dimensionless_30spfaf06
min_ma10_waterstress_dimensionless_30spfaf06
max_ols10_waterstress_dimensionless_30spfaf06
max_ma10_waterstress_dimensionless_30spfaf06
slope_ols10_waterstress_dimensionless_30spfaf06
slope_ma10_waterstress_dimensionless_30spfaf06
intercept_ols10_waterstress_dimensionless_30spfaf06
intercept_ma10_waterstress_dimensionless_30spfaf06
ols_ols10_waterstress_dimensionless_30spfaf06
ols_ma10_waterstress_dimensionless_30spfaf06
avg_ols10_waterdepletion_dimensionless_30spfaf06
avg_ma10_waterdepletion_dimensionless_30spfaf06
min_ols10_waterdepletion_dimensionless_30spfaf06
min_ma10_waterdepletion_dimensionless_30spfaf06
max_ols10_waterdepletion_dimensionless_30spfaf06
max_ma10_waterdepletion_dimensionless_30spfaf06
slope_ols10_waterdepletion_dimensionless_30spfaf06
slope_ma10_waterdepletion_dimensionless_30spfaf06
intercept_ols10_waterdepletion_dimensionless_3

## Water Stress Complete Timeseries

In [19]:
waterstress_complete_indicators = ["waterstress","waterdepletion"]

waterstress_complete_statistics = ["min_",
                                   "max_",
                                   "avg_",
                                   "slope_",
                                   "intercept_",
                                   "ols_"]

for waterstress_complete_indicator in waterstress_complete_indicators:
    for waterstress_complete_statistic in waterstress_complete_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_complete_statistic,waterstress_complete_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



min_waterstress_dimensionless_30spfaf06
max_waterstress_dimensionless_30spfaf06
avg_waterstress_dimensionless_30spfaf06
slope_waterstress_dimensionless_30spfaf06
intercept_waterstress_dimensionless_30spfaf06
ols_waterstress_dimensionless_30spfaf06
min_waterdepletion_dimensionless_30spfaf06
max_waterdepletion_dimensionless_30spfaf06
avg_waterdepletion_dimensionless_30spfaf06
slope_waterdepletion_dimensionless_30spfaf06
intercept_waterdepletion_dimensionless_30spfaf06
ols_waterdepletion_dimensionless_30spfaf06


In [20]:
sql = "SELECT"
for column_to_keep_left in columns_to_keep_left:
    sql += " {},".format(column_to_keep_left)
sql = sql[:-1]
sql += " FROM {}".format(INPUT_TABLE_NAME_LEFT)
sql += " LIMIT 100"

In [21]:
sql

'SELECT pfafid_30spfaf06, temporal_resolution, year, month, area_m2_30spfaf06, area_count_30spfaf06, ptotww_m_30spfaf06, ptotwn_m_30spfaf06, pdomww_m_30spfaf06, pdomwn_m_30spfaf06, pindww_m_30spfaf06, pindwn_m_30spfaf06, pirrww_m_30spfaf06, pirrwn_m_30spfaf06, plivww_m_30spfaf06, plivwn_m_30spfaf06, riverdischarge_m_30spfaf06, ma10_ptotww_m_30spfaf06, ma10_ptotwn_m_30spfaf06, ma10_pdomww_m_30spfaf06, ma10_pdomwn_m_30spfaf06, ma10_pindww_m_30spfaf06, ma10_pindwn_m_30spfaf06, ma10_pirrww_m_30spfaf06, ma10_pirrwn_m_30spfaf06, ma10_plivww_m_30spfaf06, ma10_plivwn_m_30spfaf06, ma10_riverdischarge_m_30spfaf06, min10_ptotww_m_30spfaf06, min10_ptotwn_m_30spfaf06, min10_pdomww_m_30spfaf06, min10_pdomwn_m_30spfaf06, min10_pindww_m_30spfaf06, min10_pindwn_m_30spfaf06, min10_pirrww_m_30spfaf06, min10_pirrwn_m_30spfaf06, min10_plivww_m_30spfaf06, min10_plivwn_m_30spfaf06, min10_riverdischarge_m_30spfaf06, max10_ptotww_m_30spfaf06, max10_ptotwn_m_30spfaf06, max10_pdomww_m_30spfaf06, max10_pdomwn_m_3

In [22]:
sql = "SELECT * FROM {} LIMIT 10".format(INPUT_TABLE_NAME_LEFT)

In [23]:
df_complete = pd.read_sql(sql,engine)

In [24]:
df_complete.head()

Unnamed: 0,pfafid_30spfaf06,temporal_resolution,year,month,area_m2_30spfaf06,area_count_30spfaf06,pdomww_m_30spfaf06,pdomwn_m_30spfaf06,pindww_m_30spfaf06,pindwn_m_30spfaf06,pirrww_m_30spfaf06,pirrwn_m_30spfaf06,plivww_m_30spfaf06,plivwn_m_30spfaf06,ptotww_m_30spfaf06,ptotwn_m_30spfaf06,riverdischarge_m_30spfaf06,ma10_pdomww_m_30spfaf06,min10_pdomww_m_30spfaf06,max10_pdomww_m_30spfaf06,slope10_pdomww_m_30spfaf06,intercept10_pdomww_m_30spfaf06,ols10_pdomww_m_30spfaf06,ma10_pdomwn_m_30spfaf06,min10_pdomwn_m_30spfaf06,max10_pdomwn_m_30spfaf06,slope10_pdomwn_m_30spfaf06,intercept10_pdomwn_m_30spfaf06,ols10_pdomwn_m_30spfaf06,ma10_pindww_m_30spfaf06,min10_pindww_m_30spfaf06,max10_pindww_m_30spfaf06,slope10_pindww_m_30spfaf06,intercept10_pindww_m_30spfaf06,ols10_pindww_m_30spfaf06,ma10_pindwn_m_30spfaf06,min10_pindwn_m_30spfaf06,max10_pindwn_m_30spfaf06,slope10_pindwn_m_30spfaf06,intercept10_pindwn_m_30spfaf06,ols10_pindwn_m_30spfaf06,ma10_pirrww_m_30spfaf06,min10_pirrww_m_30spfaf06,max10_pirrww_m_30spfaf06,slope10_pirrww_m_30spfaf06,intercept10_pirrww_m_30spfaf06,ols10_pirrww_m_30spfaf06,ma10_pirrwn_m_30spfaf06,min10_pirrwn_m_30spfaf06,max10_pirrwn_m_30spfaf06,slope10_pirrwn_m_30spfaf06,intercept10_pirrwn_m_30spfaf06,ols10_pirrwn_m_30spfaf06,ma10_plivww_m_30spfaf06,min10_plivww_m_30spfaf06,max10_plivww_m_30spfaf06,slope10_plivww_m_30spfaf06,intercept10_plivww_m_30spfaf06,ols10_plivww_m_30spfaf06,ma10_plivwn_m_30spfaf06,min10_plivwn_m_30spfaf06,max10_plivwn_m_30spfaf06,slope10_plivwn_m_30spfaf06,intercept10_plivwn_m_30spfaf06,ols10_plivwn_m_30spfaf06,ma10_ptotww_m_30spfaf06,min10_ptotww_m_30spfaf06,max10_ptotww_m_30spfaf06,slope10_ptotww_m_30spfaf06,intercept10_ptotww_m_30spfaf06,ols10_ptotww_m_30spfaf06,ma10_ptotwn_m_30spfaf06,min10_ptotwn_m_30spfaf06,max10_ptotwn_m_30spfaf06,slope10_ptotwn_m_30spfaf06,intercept10_ptotwn_m_30spfaf06,ols10_ptotwn_m_30spfaf06,ma10_riverdischarge_m_30spfaf06,min10_riverdischarge_m_30spfaf06,max10_riverdischarge_m_30spfaf06,slope10_riverdischarge_m_30spfaf06,intercept10_riverdischarge_m_30spfaf06,ols10_riverdischarge_m_30spfaf06,capped_ols10_pdomww_m_30spfaf06,capped_ols10_pdomwn_m_30spfaf06,capped_ols10_pindww_m_30spfaf06,capped_ols10_pindwn_m_30spfaf06,capped_ols10_pirrww_m_30spfaf06,capped_ols10_pirrwn_m_30spfaf06,capped_ols10_plivww_m_30spfaf06,capped_ols10_plivwn_m_30spfaf06,capped_ols10_ptotww_m_30spfaf06,capped_ols10_ptotwn_m_30spfaf06,capped_ols10_riverdischarge_m_30spfaf06,arid_boolean_30spfaf06,ma10_arid_boolean_30spfaf06,ols10_arid_boolean_30spfaf06,lowwateruse_boolean_30spfaf06,ma10_lowwateruse_boolean_30spfaf06,ols10_lowwateruse_boolean_30spfaf06,aridandlowwateruse_boolean_30spfaf06,ma10_aridandlowwateruse_boolean_30spfaf06,ols10_aridandlowwateruse_boolean_30spfaf06,waterstress_dimensionless_30spfaf06,waterdepletion_dimensionless_30spfaf06,ma10_waterstress_dimensionless_30spfaf06,ma10_waterdepletion_dimensionless_30spfaf06,ols10_waterstress_dimensionless_30spfaf06,ols10_waterdepletion_dimensionless_30spfaf06,avg_waterstress_dimensionless_30spfaf06,min_waterstress_dimensionless_30spfaf06,max_waterstress_dimensionless_30spfaf06,slope_waterstress_dimensionless_30spfaf06,intercept_waterstress_dimensionless_30spfaf06,ols_waterstress_dimensionless_30spfaf06,avg_waterdepletion_dimensionless_30spfaf06,min_waterdepletion_dimensionless_30spfaf06,max_waterdepletion_dimensionless_30spfaf06,slope_waterdepletion_dimensionless_30spfaf06,intercept_waterdepletion_dimensionless_30spfaf06,ols_waterdepletion_dimensionless_30spfaf06,avg_riverdischarge_m_30spfaf06,min_riverdischarge_m_30spfaf06,max_riverdischarge_m_30spfaf06,slope_riverdischarge_m_30spfaf06,intercept_riverdischarge_m_30spfaf06,ols_riverdischarge_m_30spfaf06,avg_ptotww_m_30spfaf06,min_ptotww_m_30spfaf06,max_ptotww_m_30spfaf06,slope_ptotww_m_30spfaf06,intercept_ptotww_m_30spfaf06,ols_ptotww_m_30spfaf06,avg_ptotwn_m_30spfaf06,min_ptotwn_m_30spfaf06,max_ptotwn_m_30spfaf06,slope_ptotwn_m_30spfaf06,intercept_ptotwn_m_30spfaf06,ols_ptotwn_m_30spfaf06,avg_ma10_waterstress_dimensionless_30spfaf06,min_ma10_waterstress_dimensionless_30spfaf06,max_ma10_waterstress_dimensionless_30spfaf06,slope_ma10_waterstress_dimensionless_30spfaf06,intercept_ma10_waterstress_dimensionless_30spfaf06,ols_ma10_waterstress_dimensionless_30spfaf06,avg_ma10_waterdepletion_dimensionless_30spfaf06,min_ma10_waterdepletion_dimensionless_30spfaf06,max_ma10_waterdepletion_dimensionless_30spfaf06,slope_ma10_waterdepletion_dimensionless_30spfaf06,intercept_ma10_waterdepletion_dimensionless_30spfaf06,ols_ma10_waterdepletion_dimensionless_30spfaf06,avg_ma10_riverdischarge_m_30spfaf06,min_ma10_riverdischarge_m_30spfaf06,max_ma10_riverdischarge_m_30spfaf06,slope_ma10_riverdischarge_m_30spfaf06,intercept_ma10_riverdischarge_m_30spfaf06,ols_ma10_riverdischarge_m_30spfaf06,avg_ma10_ptotww_m_30spfaf06,min_ma10_ptotww_m_30spfaf06,max_ma10_ptotww_m_30spfaf06,slope_ma10_ptotww_m_30spfaf06,intercept_ma10_ptotww_m_30spfaf06,ols_ma10_ptotww_m_30spfaf06,avg_ma10_ptotwn_m_30spfaf06,min_ma10_ptotwn_m_30spfaf06,max_ma10_ptotwn_m_30spfaf06,slope_ma10_ptotwn_m_30spfaf06,intercept_ma10_ptotwn_m_30spfaf06,ols_ma10_ptotwn_m_30spfaf06,avg_ols10_waterstress_dimensionless_30spfaf06,min_ols10_waterstress_dimensionless_30spfaf06,max_ols10_waterstress_dimensionless_30spfaf06,slope_ols10_waterstress_dimensionless_30spfaf06,intercept_ols10_waterstress_dimensionless_30spfaf06,ols_ols10_waterstress_dimensionless_30spfaf06,avg_ols10_waterdepletion_dimensionless_30spfaf06,min_ols10_waterdepletion_dimensionless_30spfaf06,max_ols10_waterdepletion_dimensionless_30spfaf06,slope_ols10_waterdepletion_dimensionless_30spfaf06,intercept_ols10_waterdepletion_dimensionless_30spfaf06,ols_ols10_waterdepletion_dimensionless_30spfaf06,avg_ols10_riverdischarge_m_30spfaf06,min_ols10_riverdischarge_m_30spfaf06,max_ols10_riverdischarge_m_30spfaf06,slope_ols10_riverdischarge_m_30spfaf06,intercept_ols10_riverdischarge_m_30spfaf06,ols_ols10_riverdischarge_m_30spfaf06,avg_ols10_ptotww_m_30spfaf06,min_ols10_ptotww_m_30spfaf06,max_ols10_ptotww_m_30spfaf06,slope_ols10_ptotww_m_30spfaf06,intercept_ols10_ptotww_m_30spfaf06,ols_ols10_ptotww_m_30spfaf06,avg_ols10_ptotwn_m_30spfaf06,min_ols10_ptotwn_m_30spfaf06,max_ols10_ptotwn_m_30spfaf06,slope_ols10_ptotwn_m_30spfaf06,intercept_ols10_ptotwn_m_30spfaf06,ols_ols10_ptotwn_m_30spfaf06,ols_ols10_arid_boolean_30spfaf06,ols_ols10_lowwateruse_boolean_30spfaf06,ols_ols10_aridandlowwateruse_boolean_30spfaf06
0,111011,month,1985,2,1885917000.0,2536,0.000164,0.000126,0.001282,0.000513,2.3e-05,9e-06,5.698419e-08,5.698419e-08,0.001469,0.000648,2.718576e-05,9.9e-05,7.4e-05,0.000164,8e-06,-0.014779,0.000133,7.6e-05,5.7e-05,0.000126,6e-06,-0.011391,0.000102,0.001138,0.000999,0.001282,3.3e-05,-0.063289,0.001285,0.000455,0.000399,0.000513,1.3e-05,-0.025316,0.000514,2.8e-05,2e-05,4.1e-05,-1.188904e-06,0.002383,2.3e-05,1.1e-05,7e-06,1.5e-05,-4.494055e-07,0.000901,9e-06,4.219093e-08,3.361337e-08,5.698419e-08,2.735575e-09,-5e-06,5.450102e-08,4.219093e-08,3.361337e-08,5.698419e-08,2.735575e-09,-5e-06,5.450102e-08,0.001265,0.001108,0.001469,3.9e-05,-0.075691,0.00144,0.000542,0.00047,0.000648,1.8e-05,-0.035811,0.000625,3.3e-05,0.0,0.00018,3.243264e-06,-0.00639,4.8e-05,0.000133,0.000102,0.001282,0.000513,2.3e-05,9e-06,5.450102e-08,5.450102e-08,0.00144,0.000625,4.8e-05,1,1,1,0,0,0,0,0,0,2.175575,0.959731,2.198088,0.941941,2.140445,0.928638,1.754725,1.0,2.358479,0.060082,-116.756516,2.505747,0.974917,0.751106,1.0,-0.003164,7.215945,0.935367,2e-05,0.0,0.00018,1e-06,-0.002812,3.7e-05,0.001088,0.000892,0.001469,2e-05,-0.038549,0.00134,0.000469,0.000381,0.000648,9e-06,-0.016532,0.000577,1.568755,1.0,2.278043,0.069877,-136.262895,2.442213,0.982711,0.930549,1.0,-0.002735,6.376921,0.948527,1.5e-05,0.0,3.8e-05,6.289822e-07,-0.001225,2.3e-05,0.001016,0.000909,0.001265,1.3e-05,-0.023821,0.001173,0.000437,0.000388,0.000542,5e-06,-0.01039,0.000506,1.831499,1.0,2.320351,0.060161,-116.866441,2.553433,0.972026,0.881574,1.015501,-0.00442,9.693293,0.918982,2.2e-05,-7e-06,7e-05,2e-06,-0.003201,4.1e-05,0.001086,0.000895,0.00144,2e-05,-0.038998,0.00133,0.000468,0.000383,0.000625,8e-06,-0.016281,0.00057,1,0,0
1,111011,month,1986,2,1885917000.0,2536,0.000169,0.000131,0.001289,0.000515,2.5e-05,1e-05,5.837154e-08,5.837154e-08,0.001483,0.000656,2.598127e-05,0.000108,7.7e-05,0.000169,1e-05,-0.019058,0.000152,8.4e-05,6e-05,0.000131,7e-06,-0.014701,0.000117,0.001167,0.001023,0.001289,3.1e-05,-0.06112,0.001309,0.000467,0.000409,0.000515,1.3e-05,-0.024448,0.000523,2.7e-05,2e-05,4.1e-05,-8.41361e-07,0.001695,2.4e-05,1e-05,7e-06,1.5e-05,-3.180344e-07,0.000641,9e-06,4.443651e-08,3.361337e-08,5.837154e-08,3.199812e-09,-6e-06,5.883566e-08,4.443651e-08,3.361337e-08,5.837154e-08,3.199812e-09,-6e-06,5.883566e-08,0.001303,0.001129,0.001483,4e-05,-0.07849,0.001484,0.000561,0.00048,0.000656,2e-05,-0.038515,0.00065,3.6e-05,0.0,0.00018,6.088591e-07,-0.00117,3.9e-05,0.000152,0.000117,0.001289,0.000515,2.4e-05,9e-06,5.837154e-08,5.837154e-08,0.001483,0.00065,3.9e-05,1,1,1,0,0,0,0,0,0,2.175994,0.961886,2.182936,0.939647,2.156082,0.943688,1.770328,1.0,2.358479,0.056988,-110.666639,2.511169,0.974435,0.751106,1.0,-0.002928,6.752241,0.936365,2e-05,0.0,0.00018,1e-06,-0.002611,3.7e-05,0.001103,0.000892,0.001483,2.1e-05,-0.040481,0.001377,0.000476,0.000381,0.000656,9e-06,-0.017629,0.000595,1.591502,1.0,2.278043,0.067264,-131.120961,2.465938,0.981116,0.930549,1.0,-0.002783,6.472915,0.944931,1.6e-05,0.0,3.8e-05,7.261811e-07,-0.001417,2.5e-05,0.001027,0.000909,0.001303,1.4e-05,-0.025648,0.001202,0.000442,0.000388,0.000561,6e-06,-0.011162,0.000518,1.843983,1.0,2.320351,0.056251,-109.166932,2.547118,0.970936,0.881574,1.015501,-0.004171,9.203133,0.918794,2.2e-05,-7e-06,7e-05,2e-06,-0.00313,4.2e-05,0.001101,0.000895,0.001484,2.1e-05,-0.041255,0.00137,0.000475,0.000383,0.00065,9e-06,-0.017482,0.000589,1,0,0
2,111011,month,1987,2,1885917000.0,2536,0.000179,0.000139,0.00133,0.000532,3.8e-05,1.4e-05,5.987115e-08,5.987115e-08,0.001547,0.000685,8.635939e-08,0.000119,7.9e-05,0.000179,1.1e-05,-0.022626,0.00017,9.1e-05,6.1e-05,0.000139,9e-06,-0.017465,0.000131,0.001198,0.001023,0.00133,3.1e-05,-0.05982,0.001336,0.000479,0.000409,0.000532,1.2e-05,-0.023928,0.000534,2.7e-05,2e-05,3.8e-05,6.115063e-07,-0.001185,3e-05,1e-05,7e-06,1.4e-05,2.311493e-07,-0.000448,1.1e-05,4.688446e-08,3.361337e-08,5.987115e-08,3.438705e-09,-7e-06,6.235864e-08,4.688446e-08,3.361337e-08,5.987115e-08,3.438705e-09,-7e-06,6.235864e-08,0.001343,0.001129,0.001547,4.3e-05,-0.083637,0.001536,0.000581,0.00048,0.000685,2.1e-05,-0.041848,0.000677,3.6e-05,0.0,0.00018,-3.739116e-06,0.007449,1.9e-05,0.00017,0.000131,0.00133,0.000532,3e-05,1.1e-05,5.987115e-08,5.987115e-08,0.001536,0.000677,1.9e-05,1,1,1,0,0,0,0,0,0,2.25886,0.999874,2.177792,0.941622,2.206228,0.972448,1.787775,1.0,2.358479,0.054702,-106.167353,2.526257,0.975343,0.751106,1.0,-0.002438,5.785791,0.942437,1.9e-05,0.0,0.00018,1e-06,-0.002053,3.3e-05,0.001119,0.000892,0.001547,2.2e-05,-0.042645,0.001418,0.000483,0.000381,0.000685,1e-05,-0.018796,0.000615,1.612441,1.0,2.278043,0.064638,-125.950881,2.485056,0.979705,0.930549,1.0,-0.002787,6.480557,0.942076,1.7e-05,0.0,3.8e-05,7.985331e-07,-0.001559,2.8e-05,0.001038,0.000909,0.001343,1.4e-05,-0.027502,0.001233,0.000447,0.000388,0.000581,6e-06,-0.011985,0.000532,1.8574,1.0,2.320351,0.053099,-102.959768,2.547685,0.970992,0.881574,1.015501,-0.003712,8.299335,0.92273,2.2e-05,-7e-06,7e-05,1e-06,-0.002744,4e-05,0.001117,0.000895,0.001536,2.3e-05,-0.043527,0.001411,0.000482,0.000383,0.000677,1e-05,-0.018725,0.000609,1,0,0
3,111011,month,1988,2,1885917000.0,2536,0.000175,0.000136,0.001265,0.000506,2.1e-05,8e-06,6.160417e-08,6.160417e-08,0.001462,0.00065,6.158809e-05,0.000128,8.6e-05,0.000179,1.2e-05,-0.023523,0.000182,9.9e-05,6.6e-05,0.000139,9e-06,-0.01818,0.00014,0.001222,0.001084,0.00133,2.3e-05,-0.044034,0.001325,0.000489,0.000434,0.000532,9e-06,-0.017614,0.00053,2.7e-05,2e-05,3.8e-05,2.587162e-07,-0.000487,2.8e-05,1e-05,7e-06,1.4e-05,9.77947e-08,-0.000184,1e-05,4.955575e-08,3.361337e-08,6.160417e-08,3.442053e-09,-7e-06,6.504499e-08,4.955575e-08,3.361337e-08,6.160417e-08,3.442053e-09,-7e-06,6.504499e-08,0.001377,0.001197,0.001547,3.5e-05,-0.06805,0.001534,0.000598,0.00051,0.000685,1.8e-05,-0.035984,0.000681,4.2e-05,0.0,0.00018,-4.744792e-06,0.009453,2.1e-05,0.000179,0.000139,0.001325,0.00053,2.8e-05,1e-05,6.160417e-08,6.160417e-08,0.001534,0.000681,2.1e-05,1,1,1,0,0,0,0,0,0,2.055162,0.913398,2.151211,0.934104,2.186706,0.970327,1.796996,1.0,2.358479,0.051076,-99.027376,2.512062,0.973207,0.751106,1.0,-0.002621,6.147019,0.936514,2.1e-05,0.0,0.00018,1e-06,-0.002424,3.8e-05,0.001131,0.000892,0.001547,2.2e-05,-0.042931,0.001443,0.000489,0.000381,0.000685,1e-05,-0.019127,0.000628,1.631019,1.0,2.278043,0.06189,-120.539756,2.497479,0.978133,0.930549,1.0,-0.002823,6.550954,0.938609,1.8e-05,0.0,4.2e-05,8.938811e-07,-0.001747,3e-05,0.00105,0.000909,0.001377,1.5e-05,-0.029253,0.001265,0.000452,0.000388,0.000598,7e-06,-0.012793,0.000546,1.869161,1.0,2.320351,0.050039,-96.933203,2.54469,0.970968,0.881574,1.015501,-0.003333,7.552576,0.925969,2.2e-05,-7e-06,7e-05,1e-06,-0.002437,3.9e-05,0.001132,0.000895,0.001536,2.3e-05,-0.044986,0.001448,0.000489,0.000383,0.000681,1e-05,-0.01963,0.000627,1,0,0
4,111011,month,1989,2,1885917000.0,2536,0.000186,0.000144,0.001306,0.000522,2e-05,8e-06,5.720093e-08,5.720093e-08,0.001512,0.000674,2.113889e-05,0.000138,9.4e-05,0.000186,1.2e-05,-0.024111,0.000193,0.000107,7.2e-05,0.000144,9e-06,-0.018666,0.000149,0.001244,0.001159,0.00133,1.8e-05,-0.03493,0.001326,0.000498,0.000464,0.000532,7e-06,-0.013972,0.00053,2.6e-05,2e-05,3.8e-05,-3.41514e-08,9.4e-05,2.6e-05,1e-05,7e-06,1.4e-05,-1.290922e-08,3.5e-05,1e-05,5.185155e-08,3.361337e-08,6.160417e-08,2.838212e-09,-6e-06,6.46235e-08,5.185155e-08,3.361337e-08,6.160417e-08,2.838212e-09,-6e-06,6.46235e-08,0.001408,0.001274,0.001547,3e-05,-0.058953,0.001545,0.000614,0.000544,0.000685,1.7e-05,-0.032608,0.000689,4.3e-05,0.0,0.00018,-7.919316e-06,0.015759,7e-06,0.000186,0.000144,0.001326,0.00053,2.6e-05,1e-05,6.160417e-08,6.160417e-08,0.001545,0.000685,7e-06,1,1,1,0,0,0,0,0,0,2.175734,0.969577,2.142448,0.934411,2.216806,0.98928,1.80962,1.0,2.358479,0.048577,-94.105254,2.513984,0.973086,0.751106,1.0,-0.002391,5.693631,0.93842,2.1e-05,0.0,0.00018,1e-06,-0.002196,3.7e-05,0.001143,0.000892,0.001547,2.3e-05,-0.043519,0.001471,0.000495,0.000381,0.000685,1e-05,-0.019578,0.000643,1.648067,1.0,2.278043,0.0592,-115.242619,2.506469,0.976675,0.930549,1.0,-0.002832,6.568423,0.935612,1.8e-05,0.0,4.3e-05,9.716738e-07,-0.0019,3.3e-05,0.001062,0.000909,0.001408,1.6e-05,-0.030882,0.001296,0.000458,0.000388,0.000614,7e-06,-0.013573,0.000561,1.881148,1.0,2.320351,0.047433,-91.798669,2.545208,0.9716,0.881574,1.015501,-0.002874,6.647127,0.931368,2.2e-05,-7e-06,7e-05,1e-06,-0.001992,3.6e-05,0.001146,0.000895,0.001545,2.4e-05,-0.045992,0.001481,0.000496,0.000383,0.000689,1.1e-05,-0.020341,0.000644,1,0,0


In [25]:
all_columns = list(df_complete)

In [26]:
len(all_columns)

202

In [27]:
len(columns_to_keep_left)

202

In [28]:
# What Columns are excluded?
missing_columns = set(all_columns) - set(columns_to_keep_left)

In [29]:
columns_to_keep_right = ["avg1y_ols_ols10_waterstress_dimensionless_30spfaf06",
                         "avg1y_ols_ols10_weighted_waterstress_dimensionless_30spfaf06",
                         "avg1y_ols_ols10_waterdepletion_dimensionless_30spfaf06",
                         "avg1y_ols_ols10_weighted_waterdepletion_dimensionless_30spfaf06"]

In [30]:
sql =  "CREATE TABLE {} AS".format(OUTPUT_TABLE_NAME)
sql += " SELECT "
for column_to_keep_left in columns_to_keep_left:
    sql += " l.{},".format(column_to_keep_left)
for column_to_keep_right in columns_to_keep_right:
    sql += " r.{},".format(column_to_keep_right)
sql = sql[:-1]
sql += " FROM {} l".format(INPUT_TABLE_NAME_LEFT)
sql += " INNER JOIN {} r ON".format(INPUT_TABLE_NAME_RIGHT)
sql += " CONCAT(l.pfafid_30spfaf06,l.year) = CONCAT(r.pfafid_30spfaf06,r.year)"
    

In [31]:
sql

'CREATE TABLE y2018m07d12_rh_merge_simplify_tables_postgis_v01_v06 AS SELECT  l.pfafid_30spfaf06, l.temporal_resolution, l.year, l.month, l.area_m2_30spfaf06, l.area_count_30spfaf06, l.ptotww_m_30spfaf06, l.ptotwn_m_30spfaf06, l.pdomww_m_30spfaf06, l.pdomwn_m_30spfaf06, l.pindww_m_30spfaf06, l.pindwn_m_30spfaf06, l.pirrww_m_30spfaf06, l.pirrwn_m_30spfaf06, l.plivww_m_30spfaf06, l.plivwn_m_30spfaf06, l.riverdischarge_m_30spfaf06, l.ma10_ptotww_m_30spfaf06, l.ma10_ptotwn_m_30spfaf06, l.ma10_pdomww_m_30spfaf06, l.ma10_pdomwn_m_30spfaf06, l.ma10_pindww_m_30spfaf06, l.ma10_pindwn_m_30spfaf06, l.ma10_pirrww_m_30spfaf06, l.ma10_pirrwn_m_30spfaf06, l.ma10_plivww_m_30spfaf06, l.ma10_plivwn_m_30spfaf06, l.ma10_riverdischarge_m_30spfaf06, l.min10_ptotww_m_30spfaf06, l.min10_ptotwn_m_30spfaf06, l.min10_pdomww_m_30spfaf06, l.min10_pdomwn_m_30spfaf06, l.min10_pindww_m_30spfaf06, l.min10_pindwn_m_30spfaf06, l.min10_pirrww_m_30spfaf06, l.min10_pirrwn_m_30spfaf06, l.min10_plivww_m_30spfaf06, l.min10_pl

In [32]:
result = engine.execute(sql)

In [33]:
sql_index = "CREATE INDEX {}pfafid_30spfaf06 ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"pfafid_30spfaf06")

In [34]:
result = engine.execute(sql_index)

In [35]:
sql_index2 = "CREATE INDEX {}year ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"year")

In [36]:
result = engine.execute(sql_index2)

In [37]:
sql_index3 = "CREATE INDEX {}month ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"month")

In [38]:
result = engine.execute(sql_index3)

In [39]:
engine.dispose()

In [40]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:23:37.884430


Previous runs:  
0:21:34.564407  
0:23:37.884430