# Section 1
## Read in database credentials
Below the credentials for connecting to the database are read into variables by extracting the lines from the local file. The local file is not included in the repo.

In [1]:
db_name = ""
db_user = ""
db_pass = ""
db_host = ""
with open("database_credentials.txt") as f:
    db_name = f.readline().strip()
    db_user = f.readline().strip()
    db_pass = f.readline().strip()
    db_host = f.readline().strip()

## Test connection
The next code segment tests to ensure that the database connection is working properly.

In [2]:
import pymysql as pms

In [3]:
try:
    con = pms.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
    print("Successfully connected")
finally:
    if con:
        print("Closing connection")
        con.close()

Successfully connected
Closing connection


## Shorthand connect to database
This method will just return a new database connection with the default credentials made available above.

In [4]:
def get_connect():
    """
    Returns a database connection object using the default parameters
    specified in the database_credentials file read in at the start of
    this notebook.
    """
    return pms.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name);

## WHERE with column relations
The **WHERE** clause can be used to specify column value requirements. However, although `WHERE sal <= 2000` and `WHERE ename = 'BILL'` are valid, variable selection like `WHERE comm < sal` is also valid.

In [6]:
try:
    con = get_connect()
    cur = con.cursor()
    
    sql = """
        SELECT * FROM emp WHERE comm < sal;
    """
    cur.execute(sql)
    for row in cur.fetchall():
        print(row)
finally:
    if con:
        con.close()

(7499, 'ALLEN', 'SALESMAN', 7698, datetime.date(1981, 2, 20), 1600.0, 300.0, 30)
(7521, 'WARD', 'SALESMAN', 7698, datetime.date(1981, 2, 22), 1250.0, 500.0, 30)
(7844, 'TURNER', 'SALESMAN', 7698, datetime.date(1981, 9, 8), 1500.0, 0.0, 30)


## Multiple AND operators within a WHERE clause
`AND` operators can be used to perform more complex `WHERE` queries.

In [10]:
try:
    con = get_connect()
    cur = con.cursor()
    sql = """
        SELECT * FROM emp WHERE
            job != 'MANAGER' AND
            sal > 2500 AND
            deptno = 20
    """
    cur.execute(sql)
    for row in cur.fetchall():
        print(row)
finally:
    if con:
        con.close()

(7788, 'SCOTT', 'ANALYST', 7566, datetime.date(1981, 12, 9), 3000.0, None, 20)
(7902, 'FORD', 'ANALYST', 7566, datetime.date(1981, 12, 3), 3000.0, None, 20)


**Note**: It is best practice to query like `sal > 2500` as opposed to `2500 < sal`.

## Shorthand query execution and output
The method below accepts a single parameter (expected query), executes the parameter as a SQL query, and outputs the results. The connection is closed before the function terminates.

In [26]:
type("abc") == str

True

In [34]:
def execute_sql_output_result(query_string):
    """
    Given the query_string parameter, this function connects to the database, executes
    the query, outputs the result, and closes the connection.
    """
    try:
        con = get_connect()
        with con.cursor() as cur:
            #If the query_string is a single string, execute the string
            if type(query_string) == str:
                cur.execute(query_string)
                result = cur.fetchall()
                print("=== {} RESULTS ===".format(len(result)))
                for i in range(len(result)):
                    print("{}: {}".format(i, result[i]))
    finally:
        if con:
            con.close()

## OR clause
The `OR` clause can be used to aggregate rows corresponding to multiple values for a single column.

In [35]:
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        job='CLERK' OR
        job='SALESMAN'
""")

=== 8 RESULTS ===
0: (7369, 'SMITH', 'CLERK', 7902, datetime.date(1980, 12, 17), 800.0, None, 20)
1: (7499, 'ALLEN', 'SALESMAN', 7698, datetime.date(1981, 2, 20), 1600.0, 300.0, 30)
2: (7521, 'WARD', 'SALESMAN', 7698, datetime.date(1981, 2, 22), 1250.0, 500.0, 30)
3: (7654, 'MARTIN', 'SALESMAN', 7698, datetime.date(1981, 9, 28), 1250.0, 1400.0, 30)
4: (7844, 'TURNER', 'SALESMAN', 7698, datetime.date(1981, 9, 8), 1500.0, 0.0, 30)
5: (7876, 'ADAMS', 'CLERK', 7788, datetime.date(1983, 1, 12), 1100.0, None, 20)
6: (7900, 'JAMES', 'CLERK', 7698, datetime.date(1981, 12, 3), 950.0, None, 30)
7: (7934, 'MILLER', 'CLERK', 7782, datetime.date(1982, 1, 23), 1300.0, None, 10)


## IN clause
The `IN` clause can be used to query if a value belongs in a set of predefined values, similar to a multiple `OR`.

In [36]:
execute_sql_output_result("""
    SELECT ename,hiredate,deptno FROM emp WHERE
        deptno IN (20, 30)
""")

=== 11 RESULTS ===
0: ('SMITH', datetime.date(1980, 12, 17), 20)
1: ('ALLEN', datetime.date(1981, 2, 20), 30)
2: ('WARD', datetime.date(1981, 2, 22), 30)
3: ('JONES', datetime.date(1981, 4, 2), 20)
4: ('MARTIN', datetime.date(1981, 9, 28), 30)
5: ('BLAKE', datetime.date(1981, 5, 1), 30)
6: ('SCOTT', datetime.date(1981, 12, 9), 20)
7: ('TURNER', datetime.date(1981, 9, 8), 30)
8: ('ADAMS', datetime.date(1983, 1, 12), 20)
9: ('JAMES', datetime.date(1981, 12, 3), 30)
10: ('FORD', datetime.date(1981, 12, 3), 20)


## NOT clause
`NOT` can be used in conjunction with `IN` to get the compliment of the result.

In [37]:
execute_sql_output_result("""
    SELECT ename,hiredate,deptno FROM emp WHERE
        deptno NOT IN (20, 30)
""")

=== 3 RESULTS ===
0: ('CLARK', datetime.date(1981, 6, 9), 10)
1: ('KING', datetime.date(1981, 11, 17), 10)
2: ('MILLER', datetime.date(1982, 1, 23), 10)


## BETWEEN clause
`BETWEEN` can be used similarly to `IN` to specify a set of values. However, `BETWEEN` specifies a range of values.

In [38]:
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        hiredate BETWEEN '1981-05-01' AND '1982-12-09'
""")

=== 9 RESULTS ===
0: (7654, 'MARTIN', 'SALESMAN', 7698, datetime.date(1981, 9, 28), 1250.0, 1400.0, 30)
1: (7698, 'BLAKE', 'MANAGER', 7839, datetime.date(1981, 5, 1), 2850.0, None, 30)
2: (7782, 'CLARK', 'MANAGER', 7839, datetime.date(1981, 6, 9), 2450.0, None, 10)
3: (7788, 'SCOTT', 'ANALYST', 7566, datetime.date(1981, 12, 9), 3000.0, None, 20)
4: (7839, 'KING', 'PRESIDENT', None, datetime.date(1981, 11, 17), 5000.0, None, 10)
5: (7844, 'TURNER', 'SALESMAN', 7698, datetime.date(1981, 9, 8), 1500.0, 0.0, 30)
6: (7900, 'JAMES', 'CLERK', 7698, datetime.date(1981, 12, 3), 950.0, None, 30)
7: (7902, 'FORD', 'ANALYST', 7566, datetime.date(1981, 12, 3), 3000.0, None, 20)
8: (7934, 'MILLER', 'CLERK', 7782, datetime.date(1982, 1, 23), 1300.0, None, 10)


## Complimenting the BETWEEN query using NOT
`NOT` can also be used to negate the range specified using `BETWEEN`.

In [40]:
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        sal BETWEEN 950  AND 1600
""")
print()
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        sal NOT BETWEEN 950  AND 1600
""")

=== 7 RESULTS ===
0: (7499, 'ALLEN', 'SALESMAN', 7698, datetime.date(1981, 2, 20), 1600.0, 300.0, 30)
1: (7521, 'WARD', 'SALESMAN', 7698, datetime.date(1981, 2, 22), 1250.0, 500.0, 30)
2: (7654, 'MARTIN', 'SALESMAN', 7698, datetime.date(1981, 9, 28), 1250.0, 1400.0, 30)
3: (7844, 'TURNER', 'SALESMAN', 7698, datetime.date(1981, 9, 8), 1500.0, 0.0, 30)
4: (7876, 'ADAMS', 'CLERK', 7788, datetime.date(1983, 1, 12), 1100.0, None, 20)
5: (7900, 'JAMES', 'CLERK', 7698, datetime.date(1981, 12, 3), 950.0, None, 30)
6: (7934, 'MILLER', 'CLERK', 7782, datetime.date(1982, 1, 23), 1300.0, None, 10)

=== 7 RESULTS ===
0: (7369, 'SMITH', 'CLERK', 7902, datetime.date(1980, 12, 17), 800.0, None, 20)
1: (7566, 'JONES', 'MANAGER', 7839, datetime.date(1981, 4, 2), 2975.0, None, 20)
2: (7698, 'BLAKE', 'MANAGER', 7839, datetime.date(1981, 5, 1), 2850.0, None, 30)
3: (7782, 'CLARK', 'MANAGER', 7839, datetime.date(1981, 6, 9), 2450.0, None, 10)
4: (7788, 'SCOTT', 'ANALYST', 7566, datetime.date(1981, 12, 9), 3

## IS NULL
Missing data values can be detected by specifying IS NULLS as a selector. Notice how in the below query result, the second to last column is None for all rows, confirming that the specified column (comm/commission) has not been valued.

In [41]:
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        comm IS NULL;
""")

=== 10 RESULTS ===
0: (7369, 'SMITH', 'CLERK', 7902, datetime.date(1980, 12, 17), 800.0, None, 20)
1: (7566, 'JONES', 'MANAGER', 7839, datetime.date(1981, 4, 2), 2975.0, None, 20)
2: (7698, 'BLAKE', 'MANAGER', 7839, datetime.date(1981, 5, 1), 2850.0, None, 30)
3: (7782, 'CLARK', 'MANAGER', 7839, datetime.date(1981, 6, 9), 2450.0, None, 10)
4: (7788, 'SCOTT', 'ANALYST', 7566, datetime.date(1981, 12, 9), 3000.0, None, 20)
5: (7839, 'KING', 'PRESIDENT', None, datetime.date(1981, 11, 17), 5000.0, None, 10)
6: (7876, 'ADAMS', 'CLERK', 7788, datetime.date(1983, 1, 12), 1100.0, None, 20)
7: (7900, 'JAMES', 'CLERK', 7698, datetime.date(1981, 12, 3), 950.0, None, 30)
8: (7902, 'FORD', 'ANALYST', 7566, datetime.date(1981, 12, 3), 3000.0, None, 20)
9: (7934, 'MILLER', 'CLERK', 7782, datetime.date(1982, 1, 23), 1300.0, None, 10)


Additionally, `NOT NULL` is also a valid selector.

In [42]:
execute_sql_output_result("""
    SELECT * FROM emp WHERE
        comm IS NOT NULL;
""")

=== 4 RESULTS ===
0: (7499, 'ALLEN', 'SALESMAN', 7698, datetime.date(1981, 2, 20), 1600.0, 300.0, 30)
1: (7521, 'WARD', 'SALESMAN', 7698, datetime.date(1981, 2, 22), 1250.0, 500.0, 30)
2: (7654, 'MARTIN', 'SALESMAN', 7698, datetime.date(1981, 9, 28), 1250.0, 1400.0, 30)
3: (7844, 'TURNER', 'SALESMAN', 7698, datetime.date(1981, 9, 8), 1500.0, 0.0, 30)
