In [1]:
""" Upload GADM 3.6 level 1 to bigquery.
-------------------------------------------------------------------------------

Author: Rutger Hofste
Date: 20181112
Kernel: python35
Docker: rutgerhofste/gisdocker:ubuntu16.04

"""

TESTING = 0
OVERWRITE_OUTPUT = 1
SCRIPT_NAME = 'Y2018M11D12_RH_GADM36_Level1_RDS_to_BQ_V01'
OUTPUT_VERSION = 1

BQ_PROJECT_ID = "aqueduct30"
BQ_OUTPUT_DATASET_NAME = "aqueduct30v01"

RDS_DATABASE_ENDPOINT = "aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com"
RDS_DATABASE_NAME = "database01"
RDS_INPUT_TABLE_NAME = "y2018m11d12_rh_gadm36_level1_to_rds_v01_v02"
BQ_OUTPUT_TABLE_NAME = "{}_v{:02.0f}".format(SCRIPT_NAME,OUTPUT_VERSION).lower()

print("\nRDS_DATABASE_ENDPOINT: ", RDS_DATABASE_ENDPOINT,
      "\nRDS_DATABASE_NAME: ", RDS_DATABASE_NAME,
      "\nRDS_INPUT_TABLE_NAME: ",RDS_INPUT_TABLE_NAME,
      "\nBQ_OUTPUT_DATASET_NAME: ", BQ_OUTPUT_DATASET_NAME,
      "\nBQ_OUTPUT_TABLE_NAME: ", BQ_OUTPUT_TABLE_NAME)


RDS_DATABASE_ENDPOINT:  aqueduct30v05.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com 
RDS_DATABASE_NAME:  database01 
RDS_INPUT_TABLE_NAME:  y2018m11d12_rh_gadm36_level1_to_rds_v01_v02 
BQ_OUTPUT_DATASET_NAME:  aqueduct30v01 
BQ_OUTPUT_TABLE_NAME:  y2018m11d12_rh_gadm36_level1_rds_to_bq_v01_v01


In [2]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2018M11D12 UTC 14:15


'3.5.4 |Anaconda, Inc.| (default, Nov 20 2017, 18:44:38) \n[GCC 7.2.0]'

In [3]:
import os
import sqlalchemy
import pandas as pd
import geopandas as gpd
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/.google.json"
os.environ["GOOGLE_CLOUD_PROJECT"] = "aqueduct30"
client = bigquery.Client(project=BQ_PROJECT_ID)

In [4]:
F = open("/.password","r")
password = F.read().splitlines()[0]
F.close()

engine = sqlalchemy.create_engine("postgresql://rutgerhofste:{}@{}:5432/{}".format(password,RDS_DATABASE_ENDPOINT,RDS_DATABASE_NAME))


In [5]:
sql = """
SELECT
  gid_1,
  name_1,
  gid_0,
  name_0,
  varname_1,
  nl_name_1,
  type_1,
  engtype_1,
  cc_1,
  hasc_1,
  geom,
  ST_AsText(geom) AS wkt
FROM
  {}
""".format(RDS_INPUT_TABLE_NAME)

In [6]:
gdf = gpd.read_postgis(sql=sql,
                       con=engine)

In [7]:
gdf.shape

(3610, 12)

In [8]:
gdf.head()

Unnamed: 0,gid_1,name_1,gid_0,name_0,varname_1,nl_name_1,type_1,engtype_1,cc_1,hasc_1,geom,wkt
0,AFG.10_1,Ghor,AFG,Afghanistan,Gawr|Ghore|Ghour|Ghur,,Velayat,Province,,AF.GR,"(POLYGON ((64.52828217 33.32641602, 64.5058975...","MULTIPOLYGON(((64.52828217 33.32641602,64.5058..."
1,AFG.1_1,Badakhshan,AFG,Afghanistan,Badahšan,,Velayat,Province,,AF.BD,"(POLYGON ((71.14804076999999 36.00123596, 71.1...","MULTIPOLYGON(((71.14804077 36.00123596,71.1422..."
2,AFG.11_1,Hilmand,AFG,Afghanistan,Girishk|Hilmend,,Velayat,Province,,AF.HM,"(POLYGON ((63.6648941 29.4764061, 63.66107941 ...","MULTIPOLYGON(((63.6648941 29.4764061,63.661079..."
3,AFG.12_1,Hirat,AFG,Afghanistan,,,Velayat,Province,,AF.HR,"(POLYGON ((62.26362991 32.84782028, 62.2033577...","MULTIPOLYGON(((62.26362991 32.84782028,62.2033..."
4,AFG.15_1,Kandahar,AFG,Afghanistan,Qandahar,,Velayat,Province,,AF.KD,"(POLYGON ((66.19773102000001 31.86677933, 66.1...","MULTIPOLYGON(((66.19773102 31.86677933,66.1910..."


In [9]:
destination_table = "{}.{}".format(BQ_OUTPUT_DATASET_NAME,BQ_OUTPUT_TABLE_NAME)

In [13]:
df = pd.DataFrame(gdf.drop("geom",1))

In [11]:
if TESTING:
    df = df.sample(1000)

In [15]:
df.to_gbq(destination_table=destination_table,
          project_id=BQ_PROJECT_ID,
          chunksize=100,
          if_exists="replace")


0it [00:00, ?it/s][A
1it [00:00,  6.47it/s][A
2it [00:08,  4.38s/it][A
3it [00:29,  9.87s/it][A
4it [01:19, 19.99s/it][A
5it [01:58, 23.69s/it][A
6it [02:25, 24.26s/it][A
7it [02:37, 22.47s/it][A
8it [02:49, 21.17s/it][A
9it [03:05, 20.60s/it][A
10it [03:35, 21.57s/it][A
11it [03:59, 21.74s/it][A
12it [04:12, 21.02s/it][A
13it [04:34, 21.12s/it][A
14it [04:43, 20.25s/it][A
15it [05:11, 20.75s/it][A
16it [05:35, 20.94s/it][A
17it [05:50, 20.63s/it][A
18it [06:30, 21.71s/it][A
19it [06:39, 21.03s/it][A
20it [07:02, 21.15s/it][A
21it [07:11, 20.57s/it][A
22it [07:28, 20.39s/it][A
23it [07:38, 19.94s/it][A
24it [07:47, 19.49s/it][A
25it [07:58, 19.14s/it][A
26it [08:09, 18.83s/it][A
27it [08:29, 18.87s/it][A
28it [08:43, 18.69s/it][A
29it [08:59, 18.60s/it][A
30it [09:07, 18.25s/it][A
31it [09:20, 18.07s/it][A
32it [09:34, 17.95s/it][A
33it [09:51, 17.93s/it][A
34it [10:05, 17.80s/it][A
35it [10:16, 17.62s/it][A
36it [10:26, 17.42s/it][A
37it [10:43, 1

In [16]:
engine.dispose()

In [17]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:14:59.092810


previous runs:  
0:14:59.092810