### Create PostGIS database 

* Purpose of script: create postgis database using AWS RDS
* Author: Rutger Hofste
* Kernel used: python35
* Date created: 20171115

This script requires you to set a password for your database. The script will search for the file .password in the current working directory. You can use your terminal window to create the password. 


In [1]:
import time, datetime, sys
dateString = time.strftime("Y%YM%mD%d")
timeString = time.strftime("UTC %H:%M")
start = datetime.datetime.now()
print(dateString,timeString)
sys.version

Y2017M11D15 UTC 15:23


'3.5.4 |Continuum Analytics, Inc.| (default, Aug 14 2017, 13:26:58) \n[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]'

In [2]:
SCRIPT_NAME = "Y2017M11D15_RH_Create_PostGIS_Database_V01"

# Database settings
DATABASE_IDENTIFIER = "aqueduct30v01"
DATABASE_NAME = "database01"
TABLE_NAME = "hybasvalid01"

In [3]:
import boto3
import botocore
from sqlalchemy import *

In [4]:
rds = boto3.client('rds')

In [5]:
F = open(".password","r")
password = F.read().splitlines()[0]
F.close()

In [6]:
def createDB(password):
    db_identifier = DATABASE_IDENTIFIER
    rds.create_db_instance(DBInstanceIdentifier=db_identifier,
                       AllocatedStorage=20,
                       DBName=DATABASE_NAME,
                       Engine='postgres',
                       # General purpose SSD
                       StorageType='gp2',
                       StorageEncrypted=False,
                       AutoMinorVersionUpgrade=True,
                       # Set this to true later?
                       MultiAZ=False,
                       MasterUsername='rutgerhofste',
                       MasterUserPassword=password,
                       VpcSecurityGroupIds=['sg-1da15e77'], #You will need to create a security group in the console. 
                       DBInstanceClass='db.t2.micro',
                       Tags=[{'Key': 'test', 'Value': 'test'}])

In [7]:
createDB(password)

In [8]:
response = rds.describe_db_instances(DBInstanceIdentifier="%s"%(DATABASE_IDENTIFIER))

In [9]:
status = response["DBInstances"][0]["DBInstanceStatus"]

In [10]:
# Pause the script while the database is being created
while status != "available":
    response = rds.describe_db_instances(DBInstanceIdentifier="%s"%(DATABASE_IDENTIFIER)) 
    status = response["DBInstances"][0]["DBInstanceStatus"]
    time.sleep(20)
    end = datetime.datetime.now()
    elapsed = end - start
    print(status,elapsed)
    

creating 0:00:23.508295
creating 0:00:43.637833
creating 0:01:03.775159
creating 0:01:23.942095
creating 0:01:44.106049
creating 0:02:04.257653
creating 0:02:24.366334
creating 0:02:44.478330
creating 0:03:04.596858
backing-up 0:03:24.699515
backing-up 0:03:44.822307
backing-up 0:04:04.963897
backing-up 0:04:25.038344
backing-up 0:04:45.122343
backing-up 0:05:05.260969
backing-up 0:05:25.366435
backing-up 0:05:45.453445
backing-up 0:06:05.592775
backing-up 0:06:25.738269
backing-up 0:06:45.895755
backing-up 0:07:06.014332
backing-up 0:07:26.106302
backing-up 0:07:46.221036
backing-up 0:08:06.366287
available 0:08:26.495600


In [11]:
endpoint = response["DBInstances"][0]["Endpoint"]["Address"]

In [12]:
print(endpoint)

aqueduct30v01.cgpnumwmfcqc.eu-central-1.rds.amazonaws.com


In [13]:
engine = create_engine('postgresql://rutgerhofste:%s@%s:5432/%s' %(password,endpoint,DATABASE_NAME))

In [14]:
connection = engine.connect()

[Setting up PostGIS on RDS](http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Appendix.PostgreSQL.CommonDBATasks.html#Appendix.PostgreSQL.CommonDBATasks.PostGIS)

In [15]:
sqlList = []
sqlList.append("select current_user;")
sqlList.append("create extension postgis;")
sqlList.append("create extension fuzzystrmatch;")
sqlList.append("create extension postgis_tiger_geocoder;")
sqlList.append("create extension postgis_topology;")
sqlList.append("alter schema tiger owner to rds_superuser;")
sqlList.append("alter schema tiger_data owner to rds_superuser;")
sqlList.append("alter schema topology owner to rds_superuser;")
sqlList.append("CREATE FUNCTION exec(text) returns text language plpgsql volatile AS $f$ BEGIN EXECUTE $1; RETURN $1; END; $f$;")      
sqlList.append("SELECT exec('ALTER TABLE ' || quote_ident(s.nspname) || '.' || quote_ident(s.relname) || ' OWNER TO rds_superuser;') FROM ( SELECT nspname, relname FROM pg_class c JOIN pg_namespace n ON (c.relnamespace = n.oid) WHERE nspname in ('tiger','topology') AND relkind IN ('r','S','v') ORDER BY relkind = 'S')s;")
sqlList.append("SET search_path=public,tiger;")
sqlList.append("select na.address, na.streetname, na.streettypeabbrev, na.zip from normalize_address('1 Devonshire Place, Boston, MA 02109') as na;")

In [16]:
resultList = []
for sql in sqlList:
    #print(sql)
    resultList.append(connection.execute(sql))

In [17]:
connection.close()

In [18]:
end = datetime.datetime.now()
elapsed = end - start
print(elapsed)

0:08:28.844157
