In [1]:
""" Upload ICEP Basins to BQ.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20181114
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M11D14_RH_ICEPBasins_To_BQ_V01'
OUTPUT_VERSION = 1

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME_WKT = "geospatial_wkt_v01"
BQ_OUTPUT_DATASET_NAME_GEOG = "geospatial_geog_v01"

RDS_DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
RDS_DATABASE_NAME = "database01"
RDS_INPUT_TABLE_NAME = "y2018m10d01_rh_icep_basins_postgis_v01_v02"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

print("\nRDS_DATABASE_ENDPOINT: ", RDS_DATABASE_ENDPOINT,
      "\nRDS_DATABASE_NAME: ", RDS_DATABASE_NAME,
      "\nRDS_INPUT_TABLE_NAME: ",RDS_INPUT_TABLE_NAME,
      "\nBQ_OUTPUT_DATASET_NAME_WKT: ", BQ_OUTPUT_DATASET_NAME_WKT,
      "\nBQ_OUTPUT_DATASET_NAME_GEOG: ", BQ_OUTPUT_DATASET_NAME_GEOG,
      "\nBQ_OUTPUT_TABLE_NAME: ", BQ_OUTPUT_TABLE_NAME)


RDS_DATABASE_ENDPOINT:  aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com 
RDS_DATABASE_NAME:  database01 
RDS_INPUT_TABLE_NAME:  y2018m10d01_rh_icep_basins_postgis_v01_v02 
BQ_OUTPUT_DATASET_NAME_WKT:  geospatial_wkt_v01 
BQ_OUTPUT_DATASET_NAME_GEOG:  geospatial_geog_v01 
BQ_OUTPUT_TABLE_NAME:  y2018m11d14_rh_icepbasins_to_bq_v01_v01


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M11D15 UTC 10:03


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
import os
import sqlalchemy
import pandas as pd
import geopandas as gpd
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,RDS_DATABASE_ENDPOINT,RDS_DATABASE_NAME))


In [5]:
sql = """
SELECT
  icepbasinid,
  geom,
  ST_AsText(geom) AS wkt
FROM
  {}
""".format(RDS_INPUT_TABLE_NAME)

In [6]:
gdf = gpd.read_postgis(sql=sql,
                       con=engine)

In [7]:
gdf.shape

(6081, 3)

In [8]:
gdf.head()

Unnamed: 0,icepbasinid,geom,wkt
0,1,(POLYGON ((-59.99999999999977 5.50000000000005...,MULTIPOLYGON(((-59.9999999999998 5.50000000000...
1,10,(POLYGON ((-14.49999999999977 21.5000000000001...,MULTIPOLYGON(((-14.4999999999998 21.5000000000...
2,18,"(POLYGON ((137.5000000000001 -19, 137.50000000...","MULTIPOLYGON(((137.5 -19,137.5 -19.5,138 -19.5..."
3,58,"(POLYGON ((106 74.00000000000011, 106 73.50000...","MULTIPOLYGON(((106 74.0000000000001,106 73.500..."
4,63,"(POLYGON ((149.5000000000002 71, 149.500000000...","MULTIPOLYGON(((149.5 71,149.5 70.5000000000001..."


In [9]:
destination_table_wkt = "{}.{}".format(BQ_OUTPUT_DATASET_NAME_WKT,BQ_OUTPUT_TABLE_NAME)

In [10]:
df = pd.DataFrame(gdf.drop("geom",1))

In [11]:
df.to_gbq(destination_table=destination_table_wkt,
          project_id=BQ_PROJECT_ID,
          chunksize=1000,
          if_exists="replace")

7it [00:27,  3.97s/it]


In [12]:
engine.dispose()

In [13]:
job_config = bigquery.QueryJobConfig()

In [19]:
q = """
SELECT
  icepbasinid,
  ST_GeogFromText(wkt) AS geog
FROM
  {}
""".format(destination_table_wkt)

In [15]:
destination_dataset_ref = client.dataset(BQ_OUTPUT_DATASET_NAME_GEOG)

In [16]:
destination_table_ref = destination_dataset_ref.table(BQ_OUTPUT_TABLE_NAME)

In [17]:
job_config.destination = destination_table_ref

In [20]:
query_job = client.query(query=q,
                         job_config=job_config)

In [21]:
rows = query_job.result()

In [None]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

previous runs:  
0:00:34.595251
