### Create PostGIS database 

* Purpose of script: create postgis database using AWS RDS
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20171115

This script requires you to set a password for your database. The script will search for the file .password in the current working directory. You can use your terminal window to create the password. 


In [1]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2017M11D23 UTC 11:31


'3.5.4 |Continuum Analytics, Inc.| (default, Aug 14 2017, 13:26:58) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [2]:
SCRIPT_NAME = "Y2017M11D15_RH_Create_PostGIS_Database_V01"

# Database settings
DATABASE_IDENTIFIER = "aqueduct30v02"
DATABASE_NAME = "database01"

In [3]:
import boto3
import botocore
from sqlalchemy import *

In [4]:
rds = boto3.client('rds')

In [5]:
F = open(".password","r")
password = F.read().splitlines()[0]
F.close()

In [6]:
def createDB(password):
    db_identifier = DATABASE_IDENTIFIER
    rds.create_db_instance(DBInstanceIdentifier=db_identifier,
                       AllocatedStorage=20,
                       DBName=DATABASE_NAME,
                       Engine='postgres',
                       # General purpose SSD
                       StorageType='gp2',
                       StorageEncrypted=False,
                       AutoMinorVersionUpgrade=True,
                       # Set this to true later?
                       MultiAZ=False,
                       MasterUsername='rutgerhofste',
                       MasterUserPassword=password,
                       VpcSecurityGroupIds=['sg-1da15e77'], #You will need to create a security group in the console. 
                       DBInstanceClass='db.t2.large',
                       Tags=[{'Key': 'author', 'Value': 'rutger'}])

In [7]:
createDB(password)

In [8]:
response = rds.describe_db_instances(DBInstanceIdentifier="%s"%(DATABASE_IDENTIFIER))

In [9]:
status = response["DBInstances"][0]["DBInstanceStatus"]

In [10]:
# Pause the script while the database is being created
while status != "available":
    response = rds.describe_db_instances(DBInstanceIdentifier="%s"%(DATABASE_IDENTIFIER)) 
    status = response["DBInstances"][0]["DBInstanceStatus"]
    time.sleep(20)
    end = datetime.datetime.now()
    elapsed = end - start
    print(status,elapsed)
    

creating 0:00:22.244177
creating 0:00:42.364634
creating 0:01:02.478889
creating 0:01:22.573213
creating 0:01:42.692521
creating 0:02:02.838538
creating 0:02:22.993050
creating 0:02:43.127655
backing-up 0:03:03.270607
backing-up 0:03:23.370456
backing-up 0:03:43.471743
backing-up 0:04:03.573842
backing-up 0:04:23.672369
backing-up 0:04:43.786329
backing-up 0:05:03.911406
backing-up 0:05:24.041392
backing-up 0:05:44.164451
backing-up 0:06:04.254109
backing-up 0:06:24.325972
backing-up 0:06:44.432813
backing-up 0:07:04.572295
backing-up 0:07:24.696408
backing-up 0:07:44.787687
available 0:08:04.885960


In [11]:
endpoint = response["DBInstances"][0]["Endpoint"]["Address"]

In [12]:
print(endpoint)

aqueduct30v02.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com


In [13]:
engine = create_engine('postgresql://rutgerhofste:%s@%s:5432/%s' %(password,endpoint,DATABASE_NAME))

In [14]:
connection = engine.connect()

[Setting up PostGIS on RDS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.PostgreSQL.CommonDBATasks.html#Appendix.PostgreSQL.CommonDBATasks.PostGIS)

In [15]:
sqlList = []
sqlList.append("select current_user;")
sqlList.append("create extension postgis;")
sqlList.append("create extension fuzzystrmatch;")
sqlList.append("create extension postgis_tiger_geocoder;")
sqlList.append("create extension postgis_topology;")
sqlList.append("alter schema tiger owner to rds_superuser;")
sqlList.append("alter schema tiger_data owner to rds_superuser;")
sqlList.append("alter schema topology owner to rds_superuser;")
sqlList.append("CREATE FUNCTION exec(text) returns text language plpgsql volatile AS $f$ BEGIN EXECUTE $1; RETURN $1; END; $f$;")      
sqlList.append("SELECT exec('ALTER TABLE ' || quote_ident(s.nspname) || '.' || quote_ident(s.relname) || ' OWNER TO rds_superuser;') FROM ( SELECT nspname, relname FROM pg_class c JOIN pg_namespace n ON (c.relnamespace = n.oid) WHERE nspname in ('tiger','topology') AND relkind IN ('r','S','v') ORDER BY relkind = 'S')s;")
sqlList.append("SET search_path=public,tiger;")
sqlList.append("select na.address, na.streetname, na.streettypeabbrev, na.zip from normalize_address('1 Devonshire Place, Boston, MA 02109') as na;")

In [16]:
resultList = []
for sql in sqlList:
    #print(sql)
    resultList.append(connection.execute(sql))

In [17]:
connection.close()

In [18]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:08:07.746212
