In [1]:
""" Merge and simplify master table and annual scores based on months.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20180712
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M07D12_RH_Merge_Simplify_Tables_PostGIS_V01'
OUTPUT_VERSION = 4

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME_LEFT = "y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v01"
INPUT_TABLE_NAME_RIGHT = "y2018d07d12_rh_annual_scores_from_months_postgis_v01_v03"
OUTPUT_TABLE_NAME = SCRIPT_NAME.lower() + "_v{:02.0f}".format(OUTPUT_VERSION)

print("Input Table Left: " , INPUT_TABLE_NAME_LEFT, 
      "Input Table Right: " , INPUT_TABLE_NAME_RIGHT, 
      "\nOutput Table: " , OUTPUT_TABLE_NAME)

Input Table Left:  y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v01 Input Table Right:  y2018d07d12_rh_annual_scores_from_months_postgis_v01_v03 
Output Table:  y2018m07d12_rh_merge_simplify_tables_postgis_v01_v04


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M07D13 UTC 07:32


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# imports
import re
import os
import numpy as np
import pandas as pd
import aqueduct3
from datetime import timedelta
from sqlalchemy import *
pd.set_option('display.max_columns', 500)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
#connection = engine.connect()

if OVERWRITE_OUTPUT:
    sql = "DROP TABLE IF EXISTS {};".format(OUTPUT_TABLE_NAME)
    print(sql)
    result = engine.execute(sql)

DROP TABLE IF EXISTS y2018m07d12_rh_merge_simplify_tables_postgis_v01_v04;


In [5]:
columns_to_keep_left = ["pfafid_30spfaf06",
                        "temporal_resolution",
                        "year",
                        "month",
                        "area_m2_30spfaf06",
                        "area_count_30spfaf06"]

In [6]:
columns_to_keep_right = []

## Raw Data and Decadal Statistics

In [7]:
sectors = ["ptot",
           "pdom",
           "pind",
           "pirr",
           "pliv"]
use_types = ["ww","wn"]

In [8]:
decadal_indicators = []
for sector in sectors:
    for use_type in use_types:
        decadal_indicators.append("{}{}".format(sector,use_type))

In [9]:
decadal_indicators.append("riverdischarge")

In [10]:
decadal_statistics = ["",
                   "ma10_",
                   "min10_",
                   "max10_",
                   "slope10_",
                   "intercept10_",
                   "ols10_",
                   "capped_ols10_"]

In [11]:
for decadal_statistic in decadal_statistics:
    for decadal_indicator in decadal_indicators:
        indicator = "{}{}_m_30spfaf06".format(decadal_statistic,decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

ptotww_m_30spfaf06
ptotwn_m_30spfaf06
pdomww_m_30spfaf06
pdomwn_m_30spfaf06
pindww_m_30spfaf06
pindwn_m_30spfaf06
pirrww_m_30spfaf06
pirrwn_m_30spfaf06
plivww_m_30spfaf06
plivwn_m_30spfaf06
riverdischarge_m_30spfaf06
ma10_ptotww_m_30spfaf06
ma10_ptotwn_m_30spfaf06
ma10_pdomww_m_30spfaf06
ma10_pdomwn_m_30spfaf06
ma10_pindww_m_30spfaf06
ma10_pindwn_m_30spfaf06
ma10_pirrww_m_30spfaf06
ma10_pirrwn_m_30spfaf06
ma10_plivww_m_30spfaf06
ma10_plivwn_m_30spfaf06
ma10_riverdischarge_m_30spfaf06
min10_ptotww_m_30spfaf06
min10_ptotwn_m_30spfaf06
min10_pdomww_m_30spfaf06
min10_pdomwn_m_30spfaf06
min10_pindww_m_30spfaf06
min10_pindwn_m_30spfaf06
min10_pirrww_m_30spfaf06
min10_pirrwn_m_30spfaf06
min10_plivww_m_30spfaf06
min10_plivwn_m_30spfaf06
min10_riverdischarge_m_30spfaf06
max10_ptotww_m_30spfaf06
max10_ptotwn_m_30spfaf06
max10_pdomww_m_30spfaf06
max10_pdomwn_m_30spfaf06
max10_pindww_m_30spfaf06
max10_pindwn_m_30spfaf06
max10_pirrww_m_30spfaf06
max10_pirrwn_m_30spfaf06
max10_plivww_m_30spfaf06
max

## Statistics on Decadal Statistics

In [12]:
tier2_decadal_indicators = ["ptotww",
                            "ptotwn",
                            "riverdischarge"]

tier2_decadal_statistics_0 = ["ols_","avg_","min_","max_","slope_","intercept_"]
tier2_decadal_statistics_1 = ["ma10_","ols10_"]

for tier2_decadal_indicator in tier2_decadal_indicators:
    for tier2_decadal_statistic_0 in tier2_decadal_statistics_0:
        for tier2_decadal_statistic_1 in tier2_decadal_statistics_1:
            indicator = "{}{}{}_m_30spfaf06".format(tier2_decadal_statistic_0,tier2_decadal_statistic_1,tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)
            
    


ols_ma10_ptotww_m_30spfaf06
ols_ols10_ptotww_m_30spfaf06
avg_ma10_ptotww_m_30spfaf06
avg_ols10_ptotww_m_30spfaf06
min_ma10_ptotww_m_30spfaf06
min_ols10_ptotww_m_30spfaf06
max_ma10_ptotww_m_30spfaf06
max_ols10_ptotww_m_30spfaf06
slope_ma10_ptotww_m_30spfaf06
slope_ols10_ptotww_m_30spfaf06
intercept_ma10_ptotww_m_30spfaf06
intercept_ols10_ptotww_m_30spfaf06
ols_ma10_ptotwn_m_30spfaf06
ols_ols10_ptotwn_m_30spfaf06
avg_ma10_ptotwn_m_30spfaf06
avg_ols10_ptotwn_m_30spfaf06
min_ma10_ptotwn_m_30spfaf06
min_ols10_ptotwn_m_30spfaf06
max_ma10_ptotwn_m_30spfaf06
max_ols10_ptotwn_m_30spfaf06
slope_ma10_ptotwn_m_30spfaf06
slope_ols10_ptotwn_m_30spfaf06
intercept_ma10_ptotwn_m_30spfaf06
intercept_ols10_ptotwn_m_30spfaf06
ols_ma10_riverdischarge_m_30spfaf06
ols_ols10_riverdischarge_m_30spfaf06
avg_ma10_riverdischarge_m_30spfaf06
avg_ols10_riverdischarge_m_30spfaf06
min_ma10_riverdischarge_m_30spfaf06
min_ols10_riverdischarge_m_30spfaf06
max_ma10_riverdischarge_m_30spfaf06
max_ols10_riverdischarge_m_30

## Complete TimeSeries Statistics

In [13]:
# for ptotww, ptotwn and riverdischarge, statistics based on full time series are available.
complete_timeseries_statistics = ["avg_",
                                  "min_",
                                  "max_",
                                  "slope_",
                                  "intercept_",
                                  "ols_"]

complete_timeseries_indicators = ["ptotww",
                                  "ptotwn",
                                  "riverdischarge"
                                  ] 


In [14]:
for complete_timeseries_statistic in complete_timeseries_statistics:
    for complete_timeseries_indicator in complete_timeseries_indicators:
        indicator = "{}{}_m_30spfaf06".format(complete_timeseries_statistic,complete_timeseries_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

avg_ptotww_m_30spfaf06
avg_ptotwn_m_30spfaf06
avg_riverdischarge_m_30spfaf06
min_ptotww_m_30spfaf06
min_ptotwn_m_30spfaf06
min_riverdischarge_m_30spfaf06
max_ptotww_m_30spfaf06
max_ptotwn_m_30spfaf06
max_riverdischarge_m_30spfaf06
slope_ptotww_m_30spfaf06
slope_ptotwn_m_30spfaf06
slope_riverdischarge_m_30spfaf06
intercept_ptotww_m_30spfaf06
intercept_ptotwn_m_30spfaf06
intercept_riverdischarge_m_30spfaf06
ols_ptotww_m_30spfaf06
ols_ptotwn_m_30spfaf06
ols_riverdischarge_m_30spfaf06


## Raw and Decadal Arid and Lowwater Use Columns

In [15]:
arid_lowwateruse_indicators = ["arid",
                               "lowwateruse",
                               "aridandlowwateruse"]

arid_lowwateruse_statistics_tier0 = ["",
                                     "ma10_",
                                     "ols10_"]

for arid_lowwateruse_indicator in arid_lowwateruse_indicators:
    for arid_lowwateruse_statistic_tier0 in arid_lowwateruse_statistics_tier0:
        indicator = "{}{}_boolean_30spfaf06".format(arid_lowwateruse_statistic_tier0,arid_lowwateruse_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



arid_boolean_30spfaf06
ma10_arid_boolean_30spfaf06
ols10_arid_boolean_30spfaf06
lowwateruse_boolean_30spfaf06
ma10_lowwateruse_boolean_30spfaf06
ols10_lowwateruse_boolean_30spfaf06
aridandlowwateruse_boolean_30spfaf06
ma10_aridandlowwateruse_boolean_30spfaf06
ols10_aridandlowwateruse_boolean_30spfaf06


## Statistics on Decadal Statistics Arid and Lowwater Use

In [16]:
lowarid_tier2_decadal_indicators = ["arid",
                                    "lowwateruse",
                                    "aridandlowwateruse"]

lowarid_tier2_decadal_statistics_0 = ["ols_"]
lowarid_tier2_decadal_statistics_1 = ["ols10_"]

for lowarid_tier2_decadal_indicator in lowarid_tier2_decadal_indicators:
    for lowarid_tier2_decadal_statistic_0 in lowarid_tier2_decadal_statistics_0:
        for lowarid_tier2_decadal_statistic_1 in lowarid_tier2_decadal_statistics_1:
            indicator = "{}{}{}_boolean_30spfaf06".format(lowarid_tier2_decadal_statistic_0,lowarid_tier2_decadal_statistic_1,lowarid_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

ols_ols10_arid_boolean_30spfaf06
ols_ols10_lowwateruse_boolean_30spfaf06
ols_ols10_aridandlowwateruse_boolean_30spfaf06


## Water Stress Decadal

In [17]:
waterstress_decadal_indicators = ["waterstress"]

waterstress_decadal_statistics = ["",
                                  "ma10_",
                                  "ols10_"]

for waterstress_decadal_indicator in waterstress_decadal_indicators:
    for waterstress_decadal_statistic in waterstress_decadal_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_decadal_statistic,waterstress_decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)


waterstress_dimensionless_30spfaf06
ma10_waterstress_dimensionless_30spfaf06
ols10_waterstress_dimensionless_30spfaf06


## Statistics on Decadal Statistics Water Stress

In [18]:
waterstress_tier2_decadal_indicators = ["waterstress"]

waterstress_tier2_decadal_statistics_0 = ["avg_","min_","max_","slope_","intercept_","ols_"]
waterstress_tier2_decadal_statistics_1 = ["ols10_","ma10_"]


for waterstress_tier2_decadal_indicator in waterstress_tier2_decadal_indicators:
    for waterstress_tier2_decadal_statistic_0 in waterstress_tier2_decadal_statistics_0:        
        for waterstress_tier2_decadal_statistic_1 in waterstress_tier2_decadal_statistics_1:
            indicator = "{}{}{}_dimensionless_30spfaf06".format(waterstress_tier2_decadal_statistic_0,waterstress_tier2_decadal_statistic_1,waterstress_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

avg_ols10_waterstress_dimensionless_30spfaf06
avg_ma10_waterstress_dimensionless_30spfaf06
min_ols10_waterstress_dimensionless_30spfaf06
min_ma10_waterstress_dimensionless_30spfaf06
max_ols10_waterstress_dimensionless_30spfaf06
max_ma10_waterstress_dimensionless_30spfaf06
slope_ols10_waterstress_dimensionless_30spfaf06
slope_ma10_waterstress_dimensionless_30spfaf06
intercept_ols10_waterstress_dimensionless_30spfaf06
intercept_ma10_waterstress_dimensionless_30spfaf06
ols_ols10_waterstress_dimensionless_30spfaf06
ols_ma10_waterstress_dimensionless_30spfaf06


## Water Stress Complete Timeseries

In [19]:
waterstress_complete_indicators = ["waterstress"]

waterstress_complete_statistics = ["min_",
                                   "max_",
                                   "avg_",
                                   "slope_",
                                   "intercept_",
                                   "ols_"]

for waterstress_complete_indicator in waterstress_complete_indicators:
    for waterstress_complete_statistic in waterstress_complete_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_complete_statistic,waterstress_complete_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



min_waterstress_dimensionless_30spfaf06
max_waterstress_dimensionless_30spfaf06
avg_waterstress_dimensionless_30spfaf06
slope_waterstress_dimensionless_30spfaf06
intercept_waterstress_dimensionless_30spfaf06
ols_waterstress_dimensionless_30spfaf06


In [20]:
sql = "SELECT"
for column_to_keep_left in columns_to_keep_left:
    sql += " {},".format(column_to_keep_left)
sql = sql[:-1]
sql += " FROM {}".format(INPUT_TABLE_NAME_LEFT)
sql += " LIMIT 100"

In [21]:
sql

'SELECT pfafid_30spfaf06, temporal_resolution, year, month, area_m2_30spfaf06, area_count_30spfaf06, ptotww_m_30spfaf06, ptotwn_m_30spfaf06, pdomww_m_30spfaf06, pdomwn_m_30spfaf06, pindww_m_30spfaf06, pindwn_m_30spfaf06, pirrww_m_30spfaf06, pirrwn_m_30spfaf06, plivww_m_30spfaf06, plivwn_m_30spfaf06, riverdischarge_m_30spfaf06, ma10_ptotww_m_30spfaf06, ma10_ptotwn_m_30spfaf06, ma10_pdomww_m_30spfaf06, ma10_pdomwn_m_30spfaf06, ma10_pindww_m_30spfaf06, ma10_pindwn_m_30spfaf06, ma10_pirrww_m_30spfaf06, ma10_pirrwn_m_30spfaf06, ma10_plivww_m_30spfaf06, ma10_plivwn_m_30spfaf06, ma10_riverdischarge_m_30spfaf06, min10_ptotww_m_30spfaf06, min10_ptotwn_m_30spfaf06, min10_pdomww_m_30spfaf06, min10_pdomwn_m_30spfaf06, min10_pindww_m_30spfaf06, min10_pindwn_m_30spfaf06, min10_pirrww_m_30spfaf06, min10_pirrwn_m_30spfaf06, min10_plivww_m_30spfaf06, min10_plivwn_m_30spfaf06, min10_riverdischarge_m_30spfaf06, max10_ptotww_m_30spfaf06, max10_ptotwn_m_30spfaf06, max10_pdomww_m_30spfaf06, max10_pdomwn_m_3

In [22]:
sql = "SELECT * FROM {} LIMIT 10".format(INPUT_TABLE_NAME_LEFT)

In [23]:
df_complete = pd.read_sql(sql,engine)

In [24]:
df_complete.head()

Unnamed: 0,pfafid_30spfaf06,temporal_resolution,year,month,area_m2_30spfaf06,area_count_30spfaf06,pdomww_m_30spfaf06,pdomwn_m_30spfaf06,pindww_m_30spfaf06,pindwn_m_30spfaf06,pirrww_m_30spfaf06,pirrwn_m_30spfaf06,plivww_m_30spfaf06,plivwn_m_30spfaf06,ptotww_m_30spfaf06,ptotwn_m_30spfaf06,riverdischarge_m_30spfaf06,ma10_pdomww_m_30spfaf06,min10_pdomww_m_30spfaf06,max10_pdomww_m_30spfaf06,slope10_pdomww_m_30spfaf06,intercept10_pdomww_m_30spfaf06,ols10_pdomww_m_30spfaf06,ma10_pdomwn_m_30spfaf06,min10_pdomwn_m_30spfaf06,max10_pdomwn_m_30spfaf06,slope10_pdomwn_m_30spfaf06,intercept10_pdomwn_m_30spfaf06,ols10_pdomwn_m_30spfaf06,ma10_pindww_m_30spfaf06,min10_pindww_m_30spfaf06,max10_pindww_m_30spfaf06,slope10_pindww_m_30spfaf06,intercept10_pindww_m_30spfaf06,ols10_pindww_m_30spfaf06,ma10_pindwn_m_30spfaf06,min10_pindwn_m_30spfaf06,max10_pindwn_m_30spfaf06,slope10_pindwn_m_30spfaf06,intercept10_pindwn_m_30spfaf06,ols10_pindwn_m_30spfaf06,ma10_pirrww_m_30spfaf06,min10_pirrww_m_30spfaf06,max10_pirrww_m_30spfaf06,slope10_pirrww_m_30spfaf06,intercept10_pirrww_m_30spfaf06,ols10_pirrww_m_30spfaf06,ma10_pirrwn_m_30spfaf06,min10_pirrwn_m_30spfaf06,max10_pirrwn_m_30spfaf06,slope10_pirrwn_m_30spfaf06,intercept10_pirrwn_m_30spfaf06,ols10_pirrwn_m_30spfaf06,ma10_plivww_m_30spfaf06,min10_plivww_m_30spfaf06,max10_plivww_m_30spfaf06,slope10_plivww_m_30spfaf06,intercept10_plivww_m_30spfaf06,ols10_plivww_m_30spfaf06,ma10_plivwn_m_30spfaf06,min10_plivwn_m_30spfaf06,max10_plivwn_m_30spfaf06,slope10_plivwn_m_30spfaf06,intercept10_plivwn_m_30spfaf06,ols10_plivwn_m_30spfaf06,ma10_ptotww_m_30spfaf06,min10_ptotww_m_30spfaf06,max10_ptotww_m_30spfaf06,slope10_ptotww_m_30spfaf06,intercept10_ptotww_m_30spfaf06,ols10_ptotww_m_30spfaf06,ma10_ptotwn_m_30spfaf06,min10_ptotwn_m_30spfaf06,max10_ptotwn_m_30spfaf06,slope10_ptotwn_m_30spfaf06,intercept10_ptotwn_m_30spfaf06,ols10_ptotwn_m_30spfaf06,ma10_riverdischarge_m_30spfaf06,min10_riverdischarge_m_30spfaf06,max10_riverdischarge_m_30spfaf06,slope10_riverdischarge_m_30spfaf06,intercept10_riverdischarge_m_30spfaf06,ols10_riverdischarge_m_30spfaf06,capped_ols10_pdomww_m_30spfaf06,capped_ols10_pdomwn_m_30spfaf06,capped_ols10_pindww_m_30spfaf06,capped_ols10_pindwn_m_30spfaf06,capped_ols10_pirrww_m_30spfaf06,capped_ols10_pirrwn_m_30spfaf06,capped_ols10_plivww_m_30spfaf06,capped_ols10_plivwn_m_30spfaf06,capped_ols10_ptotww_m_30spfaf06,capped_ols10_ptotwn_m_30spfaf06,capped_ols10_riverdischarge_m_30spfaf06,arid_boolean_30spfaf06,ma10_arid_boolean_30spfaf06,ols10_arid_boolean_30spfaf06,lowwateruse_boolean_30spfaf06,ma10_lowwateruse_boolean_30spfaf06,ols10_lowwateruse_boolean_30spfaf06,aridandlowwateruse_boolean_30spfaf06,ma10_aridandlowwateruse_boolean_30spfaf06,ols10_aridandlowwateruse_boolean_30spfaf06,waterstress_dimensionless_30spfaf06,ma10_waterstress_dimensionless_30spfaf06,ols10_waterstress_dimensionless_30spfaf06,avg_waterstress_dimensionless_30spfaf06,min_waterstress_dimensionless_30spfaf06,max_waterstress_dimensionless_30spfaf06,slope_waterstress_dimensionless_30spfaf06,intercept_waterstress_dimensionless_30spfaf06,ols_waterstress_dimensionless_30spfaf06,avg_riverdischarge_m_30spfaf06,min_riverdischarge_m_30spfaf06,max_riverdischarge_m_30spfaf06,slope_riverdischarge_m_30spfaf06,intercept_riverdischarge_m_30spfaf06,ols_riverdischarge_m_30spfaf06,avg_ptotww_m_30spfaf06,min_ptotww_m_30spfaf06,max_ptotww_m_30spfaf06,slope_ptotww_m_30spfaf06,intercept_ptotww_m_30spfaf06,ols_ptotww_m_30spfaf06,avg_ptotwn_m_30spfaf06,min_ptotwn_m_30spfaf06,max_ptotwn_m_30spfaf06,slope_ptotwn_m_30spfaf06,intercept_ptotwn_m_30spfaf06,ols_ptotwn_m_30spfaf06,avg_ma10_waterstress_dimensionless_30spfaf06,min_ma10_waterstress_dimensionless_30spfaf06,max_ma10_waterstress_dimensionless_30spfaf06,slope_ma10_waterstress_dimensionless_30spfaf06,intercept_ma10_waterstress_dimensionless_30spfaf06,ols_ma10_waterstress_dimensionless_30spfaf06,avg_ma10_riverdischarge_m_30spfaf06,min_ma10_riverdischarge_m_30spfaf06,max_ma10_riverdischarge_m_30spfaf06,slope_ma10_riverdischarge_m_30spfaf06,intercept_ma10_riverdischarge_m_30spfaf06,ols_ma10_riverdischarge_m_30spfaf06,avg_ma10_ptotww_m_30spfaf06,min_ma10_ptotww_m_30spfaf06,max_ma10_ptotww_m_30spfaf06,slope_ma10_ptotww_m_30spfaf06,intercept_ma10_ptotww_m_30spfaf06,ols_ma10_ptotww_m_30spfaf06,avg_ma10_ptotwn_m_30spfaf06,min_ma10_ptotwn_m_30spfaf06,max_ma10_ptotwn_m_30spfaf06,slope_ma10_ptotwn_m_30spfaf06,intercept_ma10_ptotwn_m_30spfaf06,ols_ma10_ptotwn_m_30spfaf06,avg_ols10_waterstress_dimensionless_30spfaf06,min_ols10_waterstress_dimensionless_30spfaf06,max_ols10_waterstress_dimensionless_30spfaf06,slope_ols10_waterstress_dimensionless_30spfaf06,intercept_ols10_waterstress_dimensionless_30spfaf06,ols_ols10_waterstress_dimensionless_30spfaf06,avg_ols10_riverdischarge_m_30spfaf06,min_ols10_riverdischarge_m_30spfaf06,max_ols10_riverdischarge_m_30spfaf06,slope_ols10_riverdischarge_m_30spfaf06,intercept_ols10_riverdischarge_m_30spfaf06,ols_ols10_riverdischarge_m_30spfaf06,avg_ols10_ptotww_m_30spfaf06,min_ols10_ptotww_m_30spfaf06,max_ols10_ptotww_m_30spfaf06,slope_ols10_ptotww_m_30spfaf06,intercept_ols10_ptotww_m_30spfaf06,ols_ols10_ptotww_m_30spfaf06,avg_ols10_ptotwn_m_30spfaf06,min_ols10_ptotwn_m_30spfaf06,max_ols10_ptotwn_m_30spfaf06,slope_ols10_ptotwn_m_30spfaf06,intercept_ols10_ptotwn_m_30spfaf06,ols_ols10_ptotwn_m_30spfaf06,ols_ols10_arid_boolean_30spfaf06,ols_ols10_lowwateruse_boolean_30spfaf06,ols_ols10_aridandlowwateruse_boolean_30spfaf06
0,422550,month,2006,3,1360174000.0,2422,0.00011,7.5e-05,0.000132,2.6e-05,0.0,0.0,6.929083e-07,6.929083e-07,0.000242,0.000102,2.36507,8.4e-05,6.7e-05,0.00011,4e-06,-0.007381,0.0001,5.8e-05,5.1e-05,7.5e-05,2e-06,-0.004157,6.8e-05,0.000102,8.2e-05,0.000132,5e-06,-0.008959,0.000122,2.5e-05,1.9e-05,3.3e-05,-4.353238e-07,0.000896,2.3e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.598303e-07,6.929083e-07,8.209049e-07,-1.015568e-08,2.1e-05,7.141297e-07,7.598303e-07,6.929083e-07,8.209049e-07,-1.015568e-08,2.1e-05,7.141297e-07,0.000186,0.000149,0.000242,8e-06,-0.016319,0.000223,8.4e-05,7.1e-05,0.000102,2e-06,-0.00324,9.1e-05,1.6034,1.118945,2.36507,0.071543,-141.58927,1.925342,0.0001,6.8e-05,0.000122,2.3e-05,0.0,0.0,7.141297e-07,7.141297e-07,0.000223,9.1e-05,1.925342,0,0,0,1,1,1,0,0,0,0.000103,0.000116,0.000116,0.000111,4.2e-05,0.000202,1.087296e-06,-0.002045,0.000136,1.476929,0.827097,2.456937,0.006511,-11.434546,1.626684,0.000154,8.9e-05,0.000242,3e-06,-0.004905,0.000212,6.6e-05,3.7e-05,0.000102,1e-06,-0.002233,9.3e-05,9.9e-05,4.2e-05,0.000151,2e-06,-0.003065,0.000135,1.490692,1.020092,2.128852,-0.001654,4.76987,1.452658,0.000143,8.9e-05,0.000186,3e-06,-0.004906,0.000202,6.2e-05,3.7e-05,8.4e-05,1e-06,-0.002267,8.9e-05,0.000113,4.6e-05,0.000224,8.638932e-07,-0.0016,0.000133,1.427581,0.768823,2.21278,0.008253,-14.943112,1.613283,0.000154,9e-05,0.000223,2e-06,-0.004532,0.000207,6.7e-05,3.7e-05,9.1e-05,1e-06,-0.002142,9.2e-05,0,1,0
1,422550,month,2007,3,1360174000.0,2422,0.000119,8.1e-05,0.000144,2.9e-05,0.0,0.0,6.552824e-07,6.552824e-07,0.000263,0.000111,1.916197,8.7e-05,6.7e-05,0.000119,6e-06,-0.010963,0.000112,6.1e-05,5.1e-05,8.1e-05,3e-06,-0.006161,7.5e-05,0.000106,8.2e-05,0.000144,7e-06,-0.013086,0.000136,2.6e-05,1.9e-05,3.3e-05,-5.588282e-07,0.001145,2.3e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.502869e-07,6.552824e-07,8.209049e-07,-1.646589e-08,3.4e-05,6.761904e-07,7.502869e-07,6.552824e-07,8.209049e-07,-1.646589e-08,3.4e-05,6.761904e-07,0.000194,0.000149,0.000263,1.2e-05,-0.024015,0.000249,8.7e-05,7.1e-05,0.000111,3e-06,-0.004982,9.9e-05,1.662576,1.118945,2.36507,0.070006,-138.525395,1.977605,0.000112,7.5e-05,0.000136,2.3e-05,0.0,0.0,6.761904e-07,6.761904e-07,0.000249,9.9e-05,1.977605,0,0,0,1,1,1,0,0,0,0.000137,0.000117,0.000126,0.000112,4.2e-05,0.000202,1.087838e-06,-0.002046,0.000137,1.486081,0.827097,2.456937,0.007233,-12.860624,1.656057,0.000156,8.9e-05,0.000263,3e-06,-0.00515,0.000219,6.7e-05,3.7e-05,0.000111,1e-06,-0.002317,9.6e-05,9.9e-05,4.2e-05,0.000151,2e-06,-0.002963,0.000135,1.494273,1.020092,2.128852,-0.001114,3.703736,1.468096,0.000144,8.9e-05,0.000194,3e-06,-0.004855,0.000203,6.2e-05,3.7e-05,8.7e-05,1e-06,-0.002255,9e-05,0.000114,4.6e-05,0.000224,8.424777e-07,-0.001558,0.000133,1.439284,0.768823,2.21278,0.0092,-16.814365,1.650894,0.000156,9e-05,0.000249,2e-06,-0.00474,0.000212,6.7e-05,3.7e-05,9.9e-05,1e-06,-0.002173,9.3e-05,0,1,0
2,422550,month,2008,3,1360174000.0,2422,0.000128,8.8e-05,0.000155,3.1e-05,0.0,0.0,6.632052e-07,6.632052e-07,0.000284,0.000119,2.492136,9.3e-05,6.7e-05,0.000128,7e-06,-0.013509,0.000123,6.4e-05,5.1e-05,8.8e-05,4e-06,-0.00829,8.3e-05,0.000113,8.2e-05,0.000155,8e-06,-0.016075,0.000149,2.5e-05,1.9e-05,3.3e-05,2.194721e-07,-0.000414,2.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.374193e-07,6.552824e-07,8.209049e-07,-1.844282e-08,3.8e-05,6.544266e-07,7.374193e-07,6.552824e-07,8.209049e-07,-1.844282e-08,3.8e-05,6.544266e-07,0.000206,0.000149,0.000284,1.5e-05,-0.029546,0.000273,9e-05,7.1e-05,0.000119,4e-06,-0.008667,0.00011,1.764146,1.118945,2.492136,0.102846,-204.28767,2.226953,0.000123,8.3e-05,0.000149,2.6e-05,0.0,0.0,6.552824e-07,6.552824e-07,0.000273,0.00011,2.226953,0,0,0,1,1,1,0,0,0,0.000114,0.000117,0.000123,0.000112,4.2e-05,0.000202,1.028095e-06,-0.001928,0.000136,1.506612,0.827097,2.492136,0.009263,-16.870886,1.728921,0.000159,8.9e-05,0.000284,3e-06,-0.005453,0.000226,6.8e-05,3.7e-05,0.000119,1e-06,-0.002426,9.9e-05,0.0001,4.2e-05,0.000151,1e-06,-0.002866,0.000135,1.499781,1.020092,2.128852,-0.000386,2.265948,1.490513,0.000145,8.9e-05,0.000206,3e-06,-0.004857,0.000206,6.3e-05,3.7e-05,9e-05,1e-06,-0.002253,9.1e-05,0.000114,4.6e-05,0.000224,8.137705e-07,-0.001501,0.000133,1.455694,0.768823,2.226953,0.010646,-19.672278,1.705886,0.000158,9e-05,0.000273,3e-06,-0.005034,0.00022,6.8e-05,3.7e-05,0.00011,1e-06,-0.002252,9.6e-05,0,1,0
3,422550,month,2009,3,1360174000.0,2422,0.000118,8.1e-05,0.000144,2.9e-05,0.0,0.0,6.598352e-07,6.598352e-07,0.000262,0.000111,2.729062,9.8e-05,7.3e-05,0.000128,6e-06,-0.012796,0.000127,6.7e-05,5.1e-05,8.8e-05,4e-06,-0.00835,8.6e-05,0.000119,8.8e-05,0.000155,8e-06,-0.015268,0.000154,2.5e-05,1.9e-05,3.3e-05,7.106449e-07,-0.001399,2.9e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.232856e-07,6.552824e-07,8.209049e-07,-1.842448e-08,3.8e-05,6.403754e-07,7.232856e-07,6.552824e-07,8.209049e-07,-1.842448e-08,3.8e-05,6.403754e-07,0.000218,0.000162,0.000284,1.4e-05,-0.028025,0.000281,9.3e-05,7.1e-05,0.000119,5e-06,-0.009712,0.000115,1.87389,1.118945,2.729062,0.146643,-292.071699,2.533783,0.000127,8.6e-05,0.000154,2.9e-05,0.0,0.0,6.552824e-07,6.552824e-07,0.000281,0.000115,2.533783,0,0,0,1,1,1,0,0,0,9.6e-05,0.000116,0.000111,0.000111,4.2e-05,0.000202,9.305836e-07,-0.001735,0.000134,1.531061,0.827097,2.729062,0.011594,-21.477887,1.815122,0.000161,8.9e-05,0.000284,3e-06,-0.005605,0.000232,6.9e-05,3.7e-05,0.000119,1e-06,-0.002476,0.000101,0.0001,4.2e-05,0.000151,1e-06,-0.002769,0.000135,1.507263,1.020092,2.128852,0.000517,0.481673,1.519925,0.000147,8.9e-05,0.000218,3e-06,-0.0049,0.000209,6.3e-05,3.7e-05,9.3e-05,1e-06,-0.00226,9.2e-05,0.000114,4.6e-05,0.000224,7.576358e-07,-0.00139,0.000132,1.477695,0.768823,2.533783,0.012648,-23.628436,1.781246,0.000161,9e-05,0.000281,3e-06,-0.005319,0.000227,6.9e-05,3.7e-05,0.000115,1e-06,-0.002341,9.8e-05,0,1,0
4,422550,month,2010,3,1360174000.0,2422,0.000125,8.6e-05,0.000153,3.1e-05,0.0,0.0,6.688972e-07,6.688972e-07,0.000279,0.000118,3.884806,0.000103,7.6e-05,0.000128,6e-06,-0.012411,0.000131,7e-05,5.1e-05,8.8e-05,4e-06,-0.008789,9e-05,0.000125,9.3e-05,0.000155,7e-06,-0.014899,0.000159,2.5e-05,1.9e-05,3.1e-05,1.498299e-06,-0.00298,3.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.080848e-07,6.552824e-07,7.928787e-07,-1.488316e-08,3.1e-05,6.411106e-07,7.080848e-07,6.552824e-07,7.928787e-07,-1.488316e-08,3.1e-05,6.411106e-07,0.000229,0.00017,0.000284,1.4e-05,-0.027279,0.000291,9.6e-05,7.1e-05,0.000119,6e-06,-0.011738,0.000123,2.085667,1.118945,3.884806,0.249206,-497.696774,3.207093,0.000128,8.8e-05,0.000155,3.1e-05,0.0,0.0,6.552824e-07,6.552824e-07,0.000284,0.000119,3.207093,0,0,0,1,1,1,0,0,0,7.2e-05,0.00011,9.1e-05,0.000111,4.2e-05,0.000202,7.873323e-07,-0.001452,0.00013,1.577213,0.827097,3.884806,0.016251,-30.680311,1.983479,0.000163,8.9e-05,0.000284,3e-06,-0.005804,0.000238,7e-05,3.7e-05,0.000119,1e-06,-0.002547,0.000103,0.0001,4.2e-05,0.000151,1e-06,-0.00265,0.000135,1.518604,1.020092,2.128852,0.001796,-2.045642,1.563494,0.000149,8.9e-05,0.000229,3e-06,-0.004979,0.000213,6.4e-05,3.7e-05,9.6e-05,1e-06,-0.002273,9.3e-05,0.000113,4.6e-05,0.000224,6.587784e-07,-0.001195,0.00013,1.512283,0.768823,3.207093,0.015973,-30.202308,1.903624,0.000163,9e-05,0.000291,3e-06,-0.005605,0.000234,7e-05,3.7e-05,0.000123,1e-06,-0.002449,0.000101,0,1,0


In [25]:
all_columns = list(df_complete)

In [26]:
len(all_columns)

181

In [27]:
len(columns_to_keep_left)

181

In [28]:
# What Columns are excluded?
missing_columns = set(all_columns) - set(columns_to_keep_left)

In [29]:
columns_to_keep_right = ["avg1y_ols_ols10_waterstress_dimensionless_30spfaf06"]

In [30]:
sql =  "CREATE TABLE {} AS".format(OUTPUT_TABLE_NAME)
sql += " SELECT "
for column_to_keep_left in columns_to_keep_left:
    sql += " l.{},".format(column_to_keep_left)
for column_to_keep_right in columns_to_keep_right:
    sql += " r.{},".format(column_to_keep_right)
sql = sql[:-1]
sql += " FROM {} l".format(INPUT_TABLE_NAME_LEFT)
sql += " INNER JOIN {} r ON".format(INPUT_TABLE_NAME_RIGHT)
sql += " CONCAT(l.pfafid_30spfaf06,l.year) = CONCAT(r.pfafid_30spfaf06,r.year)"
    

In [31]:
sql

'CREATE TABLE y2018m07d12_rh_merge_simplify_tables_postgis_v01_v04 AS SELECT  l.pfafid_30spfaf06, l.temporal_resolution, l.year, l.month, l.area_m2_30spfaf06, l.area_count_30spfaf06, l.ptotww_m_30spfaf06, l.ptotwn_m_30spfaf06, l.pdomww_m_30spfaf06, l.pdomwn_m_30spfaf06, l.pindww_m_30spfaf06, l.pindwn_m_30spfaf06, l.pirrww_m_30spfaf06, l.pirrwn_m_30spfaf06, l.plivww_m_30spfaf06, l.plivwn_m_30spfaf06, l.riverdischarge_m_30spfaf06, l.ma10_ptotww_m_30spfaf06, l.ma10_ptotwn_m_30spfaf06, l.ma10_pdomww_m_30spfaf06, l.ma10_pdomwn_m_30spfaf06, l.ma10_pindww_m_30spfaf06, l.ma10_pindwn_m_30spfaf06, l.ma10_pirrww_m_30spfaf06, l.ma10_pirrwn_m_30spfaf06, l.ma10_plivww_m_30spfaf06, l.ma10_plivwn_m_30spfaf06, l.ma10_riverdischarge_m_30spfaf06, l.min10_ptotww_m_30spfaf06, l.min10_ptotwn_m_30spfaf06, l.min10_pdomww_m_30spfaf06, l.min10_pdomwn_m_30spfaf06, l.min10_pindww_m_30spfaf06, l.min10_pindwn_m_30spfaf06, l.min10_pirrww_m_30spfaf06, l.min10_pirrwn_m_30spfaf06, l.min10_plivww_m_30spfaf06, l.min10_pl

In [None]:
result = engine.execute(sql)

In [None]:
sql_index = "CREATE INDEX {}pfafid_30spfaf06 ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"pfafid_30spfaf06")

In [None]:
result = engine.execute(sql_index)

In [None]:
sql_index2 = "CREATE INDEX {}year ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"year")

In [None]:
result = engine.execute(sql_index2)

In [None]:
sql_index3 = "CREATE INDEX {}month ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"month")

In [None]:
result = engine.execute(sql_index3)

In [None]:
engine.dispose()

In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

Previous runs:  