In [1]:
""" Merge and simplify master table and annual scores based on months.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20180712
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

Args:
    TESTING (Boolean) : Toggle testing case.
    SCRIPT_NAME (string) : Script name.
    OUTPUT_VERSION (integer) : output version.
    DATABASE_ENDPOINT (string) : RDS or postGreSQL endpoint.
    DATABASE_NAME (string) : Database name.
    TABLE_NAME_AREA_30SPFAF06 (string) : Table name used for areas. Must exist
        on same database as used in rest of script.
    S3_INPUT_PATH_RIVERDISCHARGE (string) : AWS S3 input path for 
        riverdischarge.    
    S3_INPUT_PATH_DEMAND (string) : AWS S3 input path for 
        demand.     

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M07D12_RH_Merge_Simplify_Tables_PostGIS_V01'
OUTPUT_VERSION = 5

DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
DATABASE_NAME = "database01"

INPUT_TABLE_NAME_LEFT = "y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v01"
INPUT_TABLE_NAME_RIGHT = "y2018m07d12_rh_annual_scores_from_months_postgis_v01_v04"
OUTPUT_TABLE_NAME = SCRIPT_NAME.lower() + "_v{:02.0f}".format(OUTPUT_VERSION)

print("Input Table Left: " , INPUT_TABLE_NAME_LEFT, 
      "Input Table Right: " , INPUT_TABLE_NAME_RIGHT, 
      "\nOutput Table: " , OUTPUT_TABLE_NAME)

Input Table Left:  y2018m07d09_rh_apply_aridlowonce_mask_postgis_v01_v01 Input Table Right:  y2018m07d12_rh_annual_scores_from_months_postgis_v01_v04 
Output Table:  y2018m07d12_rh_merge_simplify_tables_postgis_v01_v05


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M07D16 UTC 11:37


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
# imports
import re
import os
import numpy as np
import pandas as pd
import aqueduct3
from datetime import timedelta
from sqlalchemy import *
pd.set_option('display.max_columns', 500)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,DATABASE_ENDPOINT,DATABASE_NAME))
#connection = engine.connect()

if OVERWRITE_OUTPUT:
    sql = "DROP TABLE IF EXISTS {};".format(OUTPUT_TABLE_NAME)
    print(sql)
    result = engine.execute(sql)

DROP TABLE IF EXISTS y2018m07d12_rh_merge_simplify_tables_postgis_v01_v05;


In [5]:
columns_to_keep_left = ["pfafid_30spfaf06",
                        "temporal_resolution",
                        "year",
                        "month",
                        "area_m2_30spfaf06",
                        "area_count_30spfaf06"]

In [6]:
columns_to_keep_right = []

## Raw Data and Decadal Statistics

In [7]:
sectors = ["ptot",
           "pdom",
           "pind",
           "pirr",
           "pliv"]
use_types = ["ww","wn"]

In [8]:
decadal_indicators = []
for sector in sectors:
    for use_type in use_types:
        decadal_indicators.append("{}{}".format(sector,use_type))

In [9]:
decadal_indicators.append("riverdischarge")

In [10]:
decadal_statistics = ["",
                   "ma10_",
                   "min10_",
                   "max10_",
                   "slope10_",
                   "intercept10_",
                   "ols10_",
                   "capped_ols10_"]

In [11]:
for decadal_statistic in decadal_statistics:
    for decadal_indicator in decadal_indicators:
        indicator = "{}{}_m_30spfaf06".format(decadal_statistic,decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

ptotww_m_30spfaf06
ptotwn_m_30spfaf06
pdomww_m_30spfaf06
pdomwn_m_30spfaf06
pindww_m_30spfaf06
pindwn_m_30spfaf06
pirrww_m_30spfaf06
pirrwn_m_30spfaf06
plivww_m_30spfaf06
plivwn_m_30spfaf06
riverdischarge_m_30spfaf06
ma10_ptotww_m_30spfaf06
ma10_ptotwn_m_30spfaf06
ma10_pdomww_m_30spfaf06
ma10_pdomwn_m_30spfaf06
ma10_pindww_m_30spfaf06
ma10_pindwn_m_30spfaf06
ma10_pirrww_m_30spfaf06
ma10_pirrwn_m_30spfaf06
ma10_plivww_m_30spfaf06
ma10_plivwn_m_30spfaf06
ma10_riverdischarge_m_30spfaf06
min10_ptotww_m_30spfaf06
min10_ptotwn_m_30spfaf06
min10_pdomww_m_30spfaf06
min10_pdomwn_m_30spfaf06
min10_pindww_m_30spfaf06
min10_pindwn_m_30spfaf06
min10_pirrww_m_30spfaf06
min10_pirrwn_m_30spfaf06
min10_plivww_m_30spfaf06
min10_plivwn_m_30spfaf06
min10_riverdischarge_m_30spfaf06
max10_ptotww_m_30spfaf06
max10_ptotwn_m_30spfaf06
max10_pdomww_m_30spfaf06
max10_pdomwn_m_30spfaf06
max10_pindww_m_30spfaf06
max10_pindwn_m_30spfaf06
max10_pirrww_m_30spfaf06
max10_pirrwn_m_30spfaf06
max10_plivww_m_30spfaf06
max

## Statistics on Decadal Statistics

In [12]:
tier2_decadal_indicators = ["ptotww",
                            "ptotwn",
                            "riverdischarge"]

tier2_decadal_statistics_0 = ["ols_","avg_","min_","max_","slope_","intercept_"]
tier2_decadal_statistics_1 = ["ma10_","ols10_"]

for tier2_decadal_indicator in tier2_decadal_indicators:
    for tier2_decadal_statistic_0 in tier2_decadal_statistics_0:
        for tier2_decadal_statistic_1 in tier2_decadal_statistics_1:
            indicator = "{}{}{}_m_30spfaf06".format(tier2_decadal_statistic_0,tier2_decadal_statistic_1,tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)
            
    


ols_ma10_ptotww_m_30spfaf06
ols_ols10_ptotww_m_30spfaf06
avg_ma10_ptotww_m_30spfaf06
avg_ols10_ptotww_m_30spfaf06
min_ma10_ptotww_m_30spfaf06
min_ols10_ptotww_m_30spfaf06
max_ma10_ptotww_m_30spfaf06
max_ols10_ptotww_m_30spfaf06
slope_ma10_ptotww_m_30spfaf06
slope_ols10_ptotww_m_30spfaf06
intercept_ma10_ptotww_m_30spfaf06
intercept_ols10_ptotww_m_30spfaf06
ols_ma10_ptotwn_m_30spfaf06
ols_ols10_ptotwn_m_30spfaf06
avg_ma10_ptotwn_m_30spfaf06
avg_ols10_ptotwn_m_30spfaf06
min_ma10_ptotwn_m_30spfaf06
min_ols10_ptotwn_m_30spfaf06
max_ma10_ptotwn_m_30spfaf06
max_ols10_ptotwn_m_30spfaf06
slope_ma10_ptotwn_m_30spfaf06
slope_ols10_ptotwn_m_30spfaf06
intercept_ma10_ptotwn_m_30spfaf06
intercept_ols10_ptotwn_m_30spfaf06
ols_ma10_riverdischarge_m_30spfaf06
ols_ols10_riverdischarge_m_30spfaf06
avg_ma10_riverdischarge_m_30spfaf06
avg_ols10_riverdischarge_m_30spfaf06
min_ma10_riverdischarge_m_30spfaf06
min_ols10_riverdischarge_m_30spfaf06
max_ma10_riverdischarge_m_30spfaf06
max_ols10_riverdischarge_m_30

## Complete TimeSeries Statistics

In [13]:
# for ptotww, ptotwn and riverdischarge, statistics based on full time series are available.
complete_timeseries_statistics = ["avg_",
                                  "min_",
                                  "max_",
                                  "slope_",
                                  "intercept_",
                                  "ols_"]

complete_timeseries_indicators = ["ptotww",
                                  "ptotwn",
                                  "riverdischarge"
                                  ] 


In [14]:
for complete_timeseries_statistic in complete_timeseries_statistics:
    for complete_timeseries_indicator in complete_timeseries_indicators:
        indicator = "{}{}_m_30spfaf06".format(complete_timeseries_statistic,complete_timeseries_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)

avg_ptotww_m_30spfaf06
avg_ptotwn_m_30spfaf06
avg_riverdischarge_m_30spfaf06
min_ptotww_m_30spfaf06
min_ptotwn_m_30spfaf06
min_riverdischarge_m_30spfaf06
max_ptotww_m_30spfaf06
max_ptotwn_m_30spfaf06
max_riverdischarge_m_30spfaf06
slope_ptotww_m_30spfaf06
slope_ptotwn_m_30spfaf06
slope_riverdischarge_m_30spfaf06
intercept_ptotww_m_30spfaf06
intercept_ptotwn_m_30spfaf06
intercept_riverdischarge_m_30spfaf06
ols_ptotww_m_30spfaf06
ols_ptotwn_m_30spfaf06
ols_riverdischarge_m_30spfaf06


## Raw and Decadal Arid and Lowwater Use Columns

In [15]:
arid_lowwateruse_indicators = ["arid",
                               "lowwateruse",
                               "aridandlowwateruse"]

arid_lowwateruse_statistics_tier0 = ["",
                                     "ma10_",
                                     "ols10_"]

for arid_lowwateruse_indicator in arid_lowwateruse_indicators:
    for arid_lowwateruse_statistic_tier0 in arid_lowwateruse_statistics_tier0:
        indicator = "{}{}_boolean_30spfaf06".format(arid_lowwateruse_statistic_tier0,arid_lowwateruse_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



arid_boolean_30spfaf06
ma10_arid_boolean_30spfaf06
ols10_arid_boolean_30spfaf06
lowwateruse_boolean_30spfaf06
ma10_lowwateruse_boolean_30spfaf06
ols10_lowwateruse_boolean_30spfaf06
aridandlowwateruse_boolean_30spfaf06
ma10_aridandlowwateruse_boolean_30spfaf06
ols10_aridandlowwateruse_boolean_30spfaf06


## Statistics on Decadal Statistics Arid and Lowwater Use

In [16]:
lowarid_tier2_decadal_indicators = ["arid",
                                    "lowwateruse",
                                    "aridandlowwateruse"]

lowarid_tier2_decadal_statistics_0 = ["ols_"]
lowarid_tier2_decadal_statistics_1 = ["ols10_"]

for lowarid_tier2_decadal_indicator in lowarid_tier2_decadal_indicators:
    for lowarid_tier2_decadal_statistic_0 in lowarid_tier2_decadal_statistics_0:
        for lowarid_tier2_decadal_statistic_1 in lowarid_tier2_decadal_statistics_1:
            indicator = "{}{}{}_boolean_30spfaf06".format(lowarid_tier2_decadal_statistic_0,lowarid_tier2_decadal_statistic_1,lowarid_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

ols_ols10_arid_boolean_30spfaf06
ols_ols10_lowwateruse_boolean_30spfaf06
ols_ols10_aridandlowwateruse_boolean_30spfaf06


## Water Stress Decadal

In [17]:
waterstress_decadal_indicators = ["waterstress"]

waterstress_decadal_statistics = ["",
                                  "ma10_",
                                  "ols10_"]

for waterstress_decadal_indicator in waterstress_decadal_indicators:
    for waterstress_decadal_statistic in waterstress_decadal_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_decadal_statistic,waterstress_decadal_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)


waterstress_dimensionless_30spfaf06
ma10_waterstress_dimensionless_30spfaf06
ols10_waterstress_dimensionless_30spfaf06


## Statistics on Decadal Statistics Water Stress

In [18]:
waterstress_tier2_decadal_indicators = ["waterstress"]

waterstress_tier2_decadal_statistics_0 = ["avg_","min_","max_","slope_","intercept_","ols_"]
waterstress_tier2_decadal_statistics_1 = ["ols10_","ma10_"]


for waterstress_tier2_decadal_indicator in waterstress_tier2_decadal_indicators:
    for waterstress_tier2_decadal_statistic_0 in waterstress_tier2_decadal_statistics_0:        
        for waterstress_tier2_decadal_statistic_1 in waterstress_tier2_decadal_statistics_1:
            indicator = "{}{}{}_dimensionless_30spfaf06".format(waterstress_tier2_decadal_statistic_0,waterstress_tier2_decadal_statistic_1,waterstress_tier2_decadal_indicator)
            print(indicator)
            columns_to_keep_left.append(indicator)

avg_ols10_waterstress_dimensionless_30spfaf06
avg_ma10_waterstress_dimensionless_30spfaf06
min_ols10_waterstress_dimensionless_30spfaf06
min_ma10_waterstress_dimensionless_30spfaf06
max_ols10_waterstress_dimensionless_30spfaf06
max_ma10_waterstress_dimensionless_30spfaf06
slope_ols10_waterstress_dimensionless_30spfaf06
slope_ma10_waterstress_dimensionless_30spfaf06
intercept_ols10_waterstress_dimensionless_30spfaf06
intercept_ma10_waterstress_dimensionless_30spfaf06
ols_ols10_waterstress_dimensionless_30spfaf06
ols_ma10_waterstress_dimensionless_30spfaf06


## Water Stress Complete Timeseries

In [19]:
waterstress_complete_indicators = ["waterstress"]

waterstress_complete_statistics = ["min_",
                                   "max_",
                                   "avg_",
                                   "slope_",
                                   "intercept_",
                                   "ols_"]

for waterstress_complete_indicator in waterstress_complete_indicators:
    for waterstress_complete_statistic in waterstress_complete_statistics:
        indicator = "{}{}_dimensionless_30spfaf06".format(waterstress_complete_statistic,waterstress_complete_indicator)
        print(indicator)
        columns_to_keep_left.append(indicator)



min_waterstress_dimensionless_30spfaf06
max_waterstress_dimensionless_30spfaf06
avg_waterstress_dimensionless_30spfaf06
slope_waterstress_dimensionless_30spfaf06
intercept_waterstress_dimensionless_30spfaf06
ols_waterstress_dimensionless_30spfaf06


In [20]:
sql = "SELECT"
for column_to_keep_left in columns_to_keep_left:
    sql += " {},".format(column_to_keep_left)
sql = sql[:-1]
sql += " FROM {}".format(INPUT_TABLE_NAME_LEFT)
sql += " LIMIT 100"

In [21]:
sql

'SELECT pfafid_30spfaf06, temporal_resolution, year, month, area_m2_30spfaf06, area_count_30spfaf06, ptotww_m_30spfaf06, ptotwn_m_30spfaf06, pdomww_m_30spfaf06, pdomwn_m_30spfaf06, pindww_m_30spfaf06, pindwn_m_30spfaf06, pirrww_m_30spfaf06, pirrwn_m_30spfaf06, plivww_m_30spfaf06, plivwn_m_30spfaf06, riverdischarge_m_30spfaf06, ma10_ptotww_m_30spfaf06, ma10_ptotwn_m_30spfaf06, ma10_pdomww_m_30spfaf06, ma10_pdomwn_m_30spfaf06, ma10_pindww_m_30spfaf06, ma10_pindwn_m_30spfaf06, ma10_pirrww_m_30spfaf06, ma10_pirrwn_m_30spfaf06, ma10_plivww_m_30spfaf06, ma10_plivwn_m_30spfaf06, ma10_riverdischarge_m_30spfaf06, min10_ptotww_m_30spfaf06, min10_ptotwn_m_30spfaf06, min10_pdomww_m_30spfaf06, min10_pdomwn_m_30spfaf06, min10_pindww_m_30spfaf06, min10_pindwn_m_30spfaf06, min10_pirrww_m_30spfaf06, min10_pirrwn_m_30spfaf06, min10_plivww_m_30spfaf06, min10_plivwn_m_30spfaf06, min10_riverdischarge_m_30spfaf06, max10_ptotww_m_30spfaf06, max10_ptotwn_m_30spfaf06, max10_pdomww_m_30spfaf06, max10_pdomwn_m_3

In [22]:
sql = "SELECT * FROM {} LIMIT 10".format(INPUT_TABLE_NAME_LEFT)

In [23]:
df_complete = pd.read_sql(sql,engine)

In [24]:
df_complete.head()

Unnamed: 0,pfafid_30spfaf06,temporal_resolution,year,month,area_m2_30spfaf06,area_count_30spfaf06,pdomww_m_30spfaf06,pdomwn_m_30spfaf06,pindww_m_30spfaf06,pindwn_m_30spfaf06,pirrww_m_30spfaf06,pirrwn_m_30spfaf06,plivww_m_30spfaf06,plivwn_m_30spfaf06,ptotww_m_30spfaf06,ptotwn_m_30spfaf06,riverdischarge_m_30spfaf06,ma10_pdomww_m_30spfaf06,min10_pdomww_m_30spfaf06,max10_pdomww_m_30spfaf06,slope10_pdomww_m_30spfaf06,intercept10_pdomww_m_30spfaf06,ols10_pdomww_m_30spfaf06,ma10_pdomwn_m_30spfaf06,min10_pdomwn_m_30spfaf06,max10_pdomwn_m_30spfaf06,slope10_pdomwn_m_30spfaf06,intercept10_pdomwn_m_30spfaf06,ols10_pdomwn_m_30spfaf06,ma10_pindww_m_30spfaf06,min10_pindww_m_30spfaf06,max10_pindww_m_30spfaf06,slope10_pindww_m_30spfaf06,intercept10_pindww_m_30spfaf06,ols10_pindww_m_30spfaf06,ma10_pindwn_m_30spfaf06,min10_pindwn_m_30spfaf06,max10_pindwn_m_30spfaf06,slope10_pindwn_m_30spfaf06,intercept10_pindwn_m_30spfaf06,ols10_pindwn_m_30spfaf06,ma10_pirrww_m_30spfaf06,min10_pirrww_m_30spfaf06,max10_pirrww_m_30spfaf06,slope10_pirrww_m_30spfaf06,intercept10_pirrww_m_30spfaf06,ols10_pirrww_m_30spfaf06,ma10_pirrwn_m_30spfaf06,min10_pirrwn_m_30spfaf06,max10_pirrwn_m_30spfaf06,slope10_pirrwn_m_30spfaf06,intercept10_pirrwn_m_30spfaf06,ols10_pirrwn_m_30spfaf06,ma10_plivww_m_30spfaf06,min10_plivww_m_30spfaf06,max10_plivww_m_30spfaf06,slope10_plivww_m_30spfaf06,intercept10_plivww_m_30spfaf06,ols10_plivww_m_30spfaf06,ma10_plivwn_m_30spfaf06,min10_plivwn_m_30spfaf06,max10_plivwn_m_30spfaf06,slope10_plivwn_m_30spfaf06,intercept10_plivwn_m_30spfaf06,ols10_plivwn_m_30spfaf06,ma10_ptotww_m_30spfaf06,min10_ptotww_m_30spfaf06,max10_ptotww_m_30spfaf06,slope10_ptotww_m_30spfaf06,intercept10_ptotww_m_30spfaf06,ols10_ptotww_m_30spfaf06,ma10_ptotwn_m_30spfaf06,min10_ptotwn_m_30spfaf06,max10_ptotwn_m_30spfaf06,slope10_ptotwn_m_30spfaf06,intercept10_ptotwn_m_30spfaf06,ols10_ptotwn_m_30spfaf06,ma10_riverdischarge_m_30spfaf06,min10_riverdischarge_m_30spfaf06,max10_riverdischarge_m_30spfaf06,slope10_riverdischarge_m_30spfaf06,intercept10_riverdischarge_m_30spfaf06,ols10_riverdischarge_m_30spfaf06,capped_ols10_pdomww_m_30spfaf06,capped_ols10_pdomwn_m_30spfaf06,capped_ols10_pindww_m_30spfaf06,capped_ols10_pindwn_m_30spfaf06,capped_ols10_pirrww_m_30spfaf06,capped_ols10_pirrwn_m_30spfaf06,capped_ols10_plivww_m_30spfaf06,capped_ols10_plivwn_m_30spfaf06,capped_ols10_ptotww_m_30spfaf06,capped_ols10_ptotwn_m_30spfaf06,capped_ols10_riverdischarge_m_30spfaf06,arid_boolean_30spfaf06,ma10_arid_boolean_30spfaf06,ols10_arid_boolean_30spfaf06,lowwateruse_boolean_30spfaf06,ma10_lowwateruse_boolean_30spfaf06,ols10_lowwateruse_boolean_30spfaf06,aridandlowwateruse_boolean_30spfaf06,ma10_aridandlowwateruse_boolean_30spfaf06,ols10_aridandlowwateruse_boolean_30spfaf06,waterstress_dimensionless_30spfaf06,ma10_waterstress_dimensionless_30spfaf06,ols10_waterstress_dimensionless_30spfaf06,avg_waterstress_dimensionless_30spfaf06,min_waterstress_dimensionless_30spfaf06,max_waterstress_dimensionless_30spfaf06,slope_waterstress_dimensionless_30spfaf06,intercept_waterstress_dimensionless_30spfaf06,ols_waterstress_dimensionless_30spfaf06,avg_riverdischarge_m_30spfaf06,min_riverdischarge_m_30spfaf06,max_riverdischarge_m_30spfaf06,slope_riverdischarge_m_30spfaf06,intercept_riverdischarge_m_30spfaf06,ols_riverdischarge_m_30spfaf06,avg_ptotww_m_30spfaf06,min_ptotww_m_30spfaf06,max_ptotww_m_30spfaf06,slope_ptotww_m_30spfaf06,intercept_ptotww_m_30spfaf06,ols_ptotww_m_30spfaf06,avg_ptotwn_m_30spfaf06,min_ptotwn_m_30spfaf06,max_ptotwn_m_30spfaf06,slope_ptotwn_m_30spfaf06,intercept_ptotwn_m_30spfaf06,ols_ptotwn_m_30spfaf06,avg_ma10_waterstress_dimensionless_30spfaf06,min_ma10_waterstress_dimensionless_30spfaf06,max_ma10_waterstress_dimensionless_30spfaf06,slope_ma10_waterstress_dimensionless_30spfaf06,intercept_ma10_waterstress_dimensionless_30spfaf06,ols_ma10_waterstress_dimensionless_30spfaf06,avg_ma10_riverdischarge_m_30spfaf06,min_ma10_riverdischarge_m_30spfaf06,max_ma10_riverdischarge_m_30spfaf06,slope_ma10_riverdischarge_m_30spfaf06,intercept_ma10_riverdischarge_m_30spfaf06,ols_ma10_riverdischarge_m_30spfaf06,avg_ma10_ptotww_m_30spfaf06,min_ma10_ptotww_m_30spfaf06,max_ma10_ptotww_m_30spfaf06,slope_ma10_ptotww_m_30spfaf06,intercept_ma10_ptotww_m_30spfaf06,ols_ma10_ptotww_m_30spfaf06,avg_ma10_ptotwn_m_30spfaf06,min_ma10_ptotwn_m_30spfaf06,max_ma10_ptotwn_m_30spfaf06,slope_ma10_ptotwn_m_30spfaf06,intercept_ma10_ptotwn_m_30spfaf06,ols_ma10_ptotwn_m_30spfaf06,avg_ols10_waterstress_dimensionless_30spfaf06,min_ols10_waterstress_dimensionless_30spfaf06,max_ols10_waterstress_dimensionless_30spfaf06,slope_ols10_waterstress_dimensionless_30spfaf06,intercept_ols10_waterstress_dimensionless_30spfaf06,ols_ols10_waterstress_dimensionless_30spfaf06,avg_ols10_riverdischarge_m_30spfaf06,min_ols10_riverdischarge_m_30spfaf06,max_ols10_riverdischarge_m_30spfaf06,slope_ols10_riverdischarge_m_30spfaf06,intercept_ols10_riverdischarge_m_30spfaf06,ols_ols10_riverdischarge_m_30spfaf06,avg_ols10_ptotww_m_30spfaf06,min_ols10_ptotww_m_30spfaf06,max_ols10_ptotww_m_30spfaf06,slope_ols10_ptotww_m_30spfaf06,intercept_ols10_ptotww_m_30spfaf06,ols_ols10_ptotww_m_30spfaf06,avg_ols10_ptotwn_m_30spfaf06,min_ols10_ptotwn_m_30spfaf06,max_ols10_ptotwn_m_30spfaf06,slope_ols10_ptotwn_m_30spfaf06,intercept_ols10_ptotwn_m_30spfaf06,ols_ols10_ptotwn_m_30spfaf06,ols_ols10_arid_boolean_30spfaf06,ols_ols10_lowwateruse_boolean_30spfaf06,ols_ols10_aridandlowwateruse_boolean_30spfaf06
0,111011,month,1960,1,1885917000.0,2536,6.1e-05,4.9e-05,0.000827,0.000331,3.8e-05,1.4e-05,2.812859e-08,2.812859e-08,0.000926,0.000394,4.4e-05,6.1e-05,6.1e-05,6.1e-05,,,,4.9e-05,4.9e-05,4.9e-05,,,,0.000827,0.000827,0.000827,,,,0.000331,0.000331,0.000331,,,,3.8e-05,3.8e-05,3.8e-05,,,,1.4e-05,1.4e-05,1.4e-05,,,,2.812859e-08,2.812859e-08,2.812859e-08,,,,2.812859e-08,2.812859e-08,2.812859e-08,,,,0.000926,0.000926,0.000926,,,,0.000394,0.000394,0.000394,,,,4.4e-05,4.4e-05,4.4e-05,,,,,,,,,,,,,,,1,1,0,1,1,0,1,1,0,1.0,1.0,,1.0,1.0,1.0,,,,4.4e-05,4.4e-05,4.4e-05,,,,0.000926,0.000926,0.000926,,,,0.000394,0.000394,0.000394,,,,1.0,1.0,1.0,,,,4.4e-05,4.4e-05,4.4e-05,,,,0.000926,0.000926,0.000926,,,,0.000394,0.000394,0.000394,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,0,0
1,111011,month,1961,1,1885917000.0,2536,6.3e-05,5e-05,0.000832,0.000333,3.7e-05,1.4e-05,2.891903e-08,2.891903e-08,0.000932,0.000397,5e-06,6.2e-05,6.1e-05,6.3e-05,1.84916e-06,-0.003564,6.3e-05,4.9e-05,4.9e-05,5e-05,1.266633e-06,-0.002434,5e-05,0.00083,0.000827,0.000832,4.581474e-06,-0.008152,0.000832,0.000332,0.000331,0.000333,1.83259e-06,-0.003261,0.000333,3.8e-05,3.7e-05,3.8e-05,-9.904065e-07,0.001979,3.7e-05,1.4e-05,1.4e-05,1.4e-05,-3.743737e-07,0.000748,1.4e-05,2.852381e-08,2.812859e-08,2.891903e-08,7.904398e-10,-2e-06,2.891903e-08,2.852381e-08,2.812859e-08,2.891903e-08,7.904398e-10,-2e-06,2.891903e-08,0.000929,0.000926,0.000932,5e-06,-0.009738,0.000932,0.000396,0.000394,0.000397,2.72564e-06,-0.004948,0.000397,2.4e-05,5e-06,4.4e-05,-3.9e-05,0.07709,5e-06,6.3e-05,5e-05,0.000832,0.000333,3.7e-05,1.4e-05,2.891903e-08,2.891903e-08,0.000932,0.000397,5e-06,1,1,1,1,1,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.4e-05,5e-06,4.4e-05,-3.9e-05,0.07709,5e-06,0.000929,0.000926,0.000932,5e-06,-0.009738,0.000932,0.000396,0.000394,0.000397,2.72564e-06,-0.004948,0.000397,1.0,1.0,1.0,0.0,1.0,1.0,3.4e-05,2.4e-05,4.4e-05,-2e-05,0.038567,2.4e-05,0.000928,0.000926,0.000929,2.720509e-06,-0.004406,0.000929,0.000395,0.000394,0.000396,1.36282e-06,-0.002277,0.000396,1.0,1.0,1.0,,,,5e-06,5e-06,5e-06,,,,0.000932,0.000932,0.000932,,,,0.000397,0.000397,0.000397,,,,1,0,0
2,111011,month,1962,1,1885917000.0,2536,6.2e-05,5e-05,0.000806,0.000322,3.6e-05,1.3e-05,2.978633e-08,2.978633e-08,0.000904,0.000385,7.5e-05,6.2e-05,6.1e-05,6.3e-05,7.396608e-07,-0.001389,6.3e-05,4.9e-05,4.9e-05,5e-05,3.94368e-07,-0.000724,5e-05,0.000822,0.000806,0.000832,-1.067461e-05,0.021755,0.000811,0.000329,0.000322,0.000333,-4.269847e-06,0.008702,0.000324,3.7e-05,3.6e-05,3.8e-05,-1.259547e-06,0.002507,3.6e-05,1.4e-05,1.3e-05,1.4e-05,-4.76109e-07,0.000948,1.3e-05,2.894465e-08,2.812859e-08,2.978633e-08,8.288671e-10,-2e-06,2.977352e-08,2.894465e-08,2.812859e-08,2.978633e-08,8.288671e-10,-2e-06,2.977352e-08,0.000921,0.000904,0.000932,-1.1e-05,0.022871,0.000909,0.000392,0.000385,0.000397,-4.350759e-06,0.008924,0.000388,4.1e-05,5e-06,7.5e-05,1.6e-05,-0.030708,5.7e-05,6.3e-05,5e-05,0.000811,0.000324,3.6e-05,1.3e-05,2.977352e-08,2.977352e-08,0.000909,0.000388,5.7e-05,1,1,1,1,1,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,4.1e-05,5e-06,7.5e-05,1.6e-05,-0.030708,5.7e-05,0.000921,0.000904,0.000932,-1.1e-05,0.022871,0.000909,0.000392,0.000385,0.000397,-4.350759e-06,0.008924,0.000388,1.0,1.0,1.0,0.0,1.0,1.0,3.7e-05,2.4e-05,4.4e-05,-1e-06,0.002634,3.5e-05,0.000925,0.000921,0.000929,-2.824385e-06,0.006464,0.000922,0.000394,0.000392,0.000396,-9.959795e-07,0.002347,0.000393,1.0,1.0,1.0,0.0,1.0,1.0,3.1e-05,5e-06,5.7e-05,5.2e-05,-0.102634,5.7e-05,0.000921,0.000909,0.000932,-2.228346e-05,0.04463,0.000909,0.000392,0.000388,0.000397,-9.068358e-06,0.01818,0.000388,1,0,0
3,111011,month,1963,1,1885917000.0,2536,6.4e-05,5.1e-05,0.000809,0.000324,5.6e-05,2.1e-05,3.070544e-08,3.070544e-08,0.000929,0.000396,0.0,6.2e-05,6.1e-05,6.4e-05,9.607242e-07,-0.001822,6.4e-05,5e-05,4.9e-05,5.1e-05,5.738213e-07,-0.001076,5.1e-05,0.000819,0.000806,0.000832,-8.087036e-06,0.016681,0.000806,0.000327,0.000322,0.000333,-3.234817e-06,0.006673,0.000323,4.2e-05,3.6e-05,5.6e-05,5.168713e-06,-0.010097,4.9e-05,1.6e-05,1.3e-05,2.1e-05,1.953773e-06,-0.003817,1.9e-05,2.938485e-08,2.812859e-08,3.070544e-08,8.597848e-10,-2e-06,3.067453e-08,2.938485e-08,2.812859e-08,3.070544e-08,8.597848e-10,-2e-06,3.067453e-08,0.000923,0.000904,0.000932,-2e-06,0.004761,0.00092,0.000393,0.000385,0.000397,-7.063624e-07,0.001779,0.000392,3.1e-05,0.0,7.5e-05,-6e-06,0.012099,2.2e-05,6.4e-05,5.1e-05,0.000806,0.000323,4.9e-05,1.9e-05,3.067453e-08,3.067453e-08,0.00092,0.000392,2.2e-05,1,1,1,1,1,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,3.1e-05,0.0,7.5e-05,-6e-06,0.012099,2.2e-05,0.000923,0.000904,0.000932,-2e-06,0.004761,0.00092,0.000393,0.000385,0.000397,-7.063624e-07,0.001779,0.000392,1.0,1.0,1.0,0.0,1.0,1.0,3.5e-05,2.4e-05,4.4e-05,-2e-06,0.004351,3.2e-05,0.000925,0.000921,0.000929,-1.901377e-06,0.004654,0.000922,0.000394,0.000392,0.000396,-6.745804e-07,0.001717,0.000393,1.0,1.0,1.0,0.0,1.0,1.0,2.8e-05,5e-06,5.7e-05,9e-06,-0.016725,3.6e-05,0.00092,0.000909,0.000932,-5.962147e-06,0.012618,0.000914,0.000392,0.000388,0.000397,-2.457763e-06,0.005214,0.00039,1,0,0
4,111011,month,1964,1,1885917000.0,2536,6.8e-05,5.4e-05,0.000837,0.000335,3e-05,1.1e-05,3.178289e-08,3.178289e-08,0.000935,0.0004,2e-06,6.4e-05,6.1e-05,6.8e-05,1.595331e-06,-0.003066,6.7e-05,5.1e-05,4.9e-05,5.4e-05,1.075717e-06,-0.00206,5.3e-05,0.000822,0.000806,0.000837,-3.526929e-07,0.001514,0.000822,0.000329,0.000322,0.000335,-1.410787e-07,0.000606,0.000329,3.9e-05,3e-05,5.6e-05,2.450448e-07,-0.000441,4e-05,1.5e-05,1.1e-05,2.1e-05,9.262683e-08,-0.000167,1.5e-05,2.986446e-08,2.812859e-08,3.178289e-08,9.095001e-10,-2e-06,3.168346e-08,2.986446e-08,2.812859e-08,3.178289e-08,9.095001e-10,-2e-06,3.168346e-08,0.000925,0.000904,0.000935,1e-06,-0.001995,0.000928,0.000394,0.000385,0.0004,1.028175e-06,-0.001623,0.000396,2.5e-05,0.0,7.5e-05,-9e-06,0.017627,7e-06,6.7e-05,5.3e-05,0.000822,0.000329,4e-05,1.5e-05,3.168346e-08,3.168346e-08,0.000928,0.000396,7e-06,1,1,1,1,1,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.5e-05,0.0,7.5e-05,-9e-06,0.017627,7e-06,0.000925,0.000904,0.000935,1e-06,-0.001995,0.000928,0.000394,0.000385,0.0004,1.028175e-06,-0.001623,0.000396,1.0,1.0,1.0,0.0,1.0,1.0,3.3e-05,2.4e-05,4.4e-05,-3e-06,0.006144,2.7e-05,0.000925,0.000921,0.000929,-8.43108e-07,0.002579,0.000923,0.000394,0.000392,0.000396,-1.991132e-07,0.000785,0.000393,1.0,1.0,1.0,0.0,1.0,1.0,2.3e-05,5e-06,5.7e-05,-3e-06,0.00549,1.9e-05,0.000922,0.000909,0.000932,-2.759557e-08,0.000976,0.000922,0.000393,0.000388,0.000397,2.898007e-07,-0.000175,0.000394,1,0,0


In [25]:
all_columns = list(df_complete)

In [26]:
len(all_columns)

181

In [27]:
len(columns_to_keep_left)

181

In [28]:
# What Columns are excluded?
missing_columns = set(all_columns) - set(columns_to_keep_left)

In [29]:
columns_to_keep_right = ["avg1y_ols_ols10_waterstress_dimensionless_30spfaf06",
                         "avg1y_ols_ols10_weighted_waterstress_dimensionless_30spfaf06"]

In [30]:
sql =  "CREATE TABLE {} AS".format(OUTPUT_TABLE_NAME)
sql += " SELECT "
for column_to_keep_left in columns_to_keep_left:
    sql += " l.{},".format(column_to_keep_left)
for column_to_keep_right in columns_to_keep_right:
    sql += " r.{},".format(column_to_keep_right)
sql = sql[:-1]
sql += " FROM {} l".format(INPUT_TABLE_NAME_LEFT)
sql += " INNER JOIN {} r ON".format(INPUT_TABLE_NAME_RIGHT)
sql += " CONCAT(l.pfafid_30spfaf06,l.year) = CONCAT(r.pfafid_30spfaf06,r.year)"
    

In [31]:
sql

'CREATE TABLE y2018m07d12_rh_merge_simplify_tables_postgis_v01_v05 AS SELECT  l.pfafid_30spfaf06, l.temporal_resolution, l.year, l.month, l.area_m2_30spfaf06, l.area_count_30spfaf06, l.ptotww_m_30spfaf06, l.ptotwn_m_30spfaf06, l.pdomww_m_30spfaf06, l.pdomwn_m_30spfaf06, l.pindww_m_30spfaf06, l.pindwn_m_30spfaf06, l.pirrww_m_30spfaf06, l.pirrwn_m_30spfaf06, l.plivww_m_30spfaf06, l.plivwn_m_30spfaf06, l.riverdischarge_m_30spfaf06, l.ma10_ptotww_m_30spfaf06, l.ma10_ptotwn_m_30spfaf06, l.ma10_pdomww_m_30spfaf06, l.ma10_pdomwn_m_30spfaf06, l.ma10_pindww_m_30spfaf06, l.ma10_pindwn_m_30spfaf06, l.ma10_pirrww_m_30spfaf06, l.ma10_pirrwn_m_30spfaf06, l.ma10_plivww_m_30spfaf06, l.ma10_plivwn_m_30spfaf06, l.ma10_riverdischarge_m_30spfaf06, l.min10_ptotww_m_30spfaf06, l.min10_ptotwn_m_30spfaf06, l.min10_pdomww_m_30spfaf06, l.min10_pdomwn_m_30spfaf06, l.min10_pindww_m_30spfaf06, l.min10_pindwn_m_30spfaf06, l.min10_pirrww_m_30spfaf06, l.min10_pirrwn_m_30spfaf06, l.min10_plivww_m_30spfaf06, l.min10_pl

In [32]:
result = engine.execute(sql)

In [33]:
sql_index = "CREATE INDEX {}pfafid_30spfaf06 ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"pfafid_30spfaf06")

In [34]:
result = engine.execute(sql_index)

In [35]:
sql_index2 = "CREATE INDEX {}year ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"year")

In [36]:
result = engine.execute(sql_index2)

In [37]:
sql_index3 = "CREATE INDEX {}month ON {} ({})".format(OUTPUT_TABLE_NAME,OUTPUT_TABLE_NAME,"month")

In [38]:
result = engine.execute(sql_index3)

In [39]:
engine.dispose()

In [40]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:21:34.564407


Previous runs:  
0:21:34.564407