## Imports and dependencies

In [1]:
%matplotlib inline
import psycopg2
import pandas as pd
import pandas.io.sql as psql
import configparser
import os
import IPython
from IPython.display import display
from IPython.display import HTML
#Maximum height of a result set
pd.set_option('display.max_rows', 500)
#Maximum number of rows to display in a cell.
pd.set_option('display.max_columns', 100)
#Show the full content of columns
pd.set_option('display.max_colwidth', -1)

## Setting up database connectivity

Create a file in your home directory containing the database credentials for your target environment. 

For example:
```
vatsan@vatsan-ubuntu:~/code/postgresopen-2017$ vim ~/.dbuser.cred
```

The content of this file should look like so (with appropriate values for HOSTNAME, PORT, USER, DATABASE & PASSWORD).
```
[database_creds]
host: HOSTNAME
port: PORT
user: USER
database: DATABASE
password: PASSWORD
```
Please set the permissions of this file to u+rwx (700), so that only you can access this file.
```
vatsan@vatsan-ubuntu:~/code/postgresopen-2017$ chmod 700 ~/.dbuser.cred
```

You should see the following:

```
vatsan@vatsan-ubuntu:~/code/postgresopen-2017$ ls -l ~/.dbuser.cred 
-rwx------ 1 vatsan vatsan 93 Sep  3 00:00 /home/vatsan/.dbuser.cred
```

## Creating database connection string

In [2]:
USER_CRED_FILE = os.path.join(os.path.expanduser('~'), '.dbuser.cred')
def fetchDBCredentials(dbcred_file=USER_CRED_FILE):
    """
       Read database access credentials from the file in $HOME/.dbuser.cred
    """
    #Read database credentials from user supplied file
    conf = configparser.ConfigParser()
    conf.read(dbcred_file)
    #host, port, user, database, password
    host = conf.get('database_creds','host')
    port = conf.get('database_creds','port')
    user = conf.get('database_creds','user')
    database = conf.get('database_creds','database')
    password = conf.get('database_creds','password')

    #Initialize connection string
    conn_str =  """dbname='{database}' user='{user}' host='{host}' port='{port}' password='{password}'""".format(                       
                    database=database,
                    host=host,
                    port=port,
                    user=user,
                    password=password
            )
    return conn_str

## Create a connection object to the database

In [3]:
conn = psycopg2.connect(fetchDBCredentials())

## Define magic commands to run SQL inline

In [4]:
from IPython.core.magic import (register_line_magic, register_cell_magic,
                                register_line_cell_magic)

@register_cell_magic
def showsql(line, cell):
    """
        Extract the code in the specific cell (should be valid SQL), and execute
        it using the connection object to the backend database. 
        The resulting Pandas dataframe
        is rendered inline below the cell using IPython.display.
        You'd use this for SELECT
    """
    #Use the global connection object defined above.
    global conn
    _ = psql.read_sql(cell, conn)
    conn.commit()
    display(_)
    return
    
@register_cell_magic
def execsql(line, cell):
    """
        Extract the code in the specific cell (should be valid SQL), and execute
        it using the connection object to the backend database. 
        You'd use this for CREATE/UPDATE/DELETE
    """
    #Use the global connection object defined above.
    global conn
    _ = psql.execute(cell, conn)
    conn.commit()
    return

# We delete these to avoid name conflicts for automagic to work
del execsql, showsql

## Test your connection to the database

You can use the `showsql` magic command we defined above for DML commands

In [5]:
%%showsql
select
    random() as x,
    random() as y
from
    generate_series(1, 5);

Unnamed: 0,x,y
0,0.990802,0.091134
1,0.273229,0.163644
2,0.760908,0.917325
3,0.736972,0.337696
4,0.093291,0.648397


You can use the `execsql` magic command we define above for DDL commands

In [6]:
%%time
%%execsql
drop table if exists test

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 7.41 ms


In [7]:
%%time
%%execsql
create table test
as
(
    select
        random() as x,
        random() as y
    from
        generate_series(1, 5)
)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 20.2 ms


Alternatively you could also use `pandas.io.sql` commands fetch the results of a DML into dataframe or issue DDLs

In [8]:
%%time
df = psql.read_sql(
        """
            select 
                random() as x, 
                random() as y 
            from 
                generate_series(1, 10) q
        """, 
        conn
    )
display(df.head())

Unnamed: 0,x,y
0,0.705604,0.495641
1,0.682594,0.642367
2,0.417824,0.56693
3,0.757868,0.84595
4,0.56763,0.254606


CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 14.4 ms


In [9]:
%%time
ctas_sql = """
            create table test
            as
            (
                select 
                    random() as x, 
                    random() as y 
                from 
                    generate_series(1, 10) q
            )
            """
conn.cursor().execute("""drop table if exists test""")
conn.commit()
conn.cursor().execute(ctas_sql)
conn.commit()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 12.5 ms
