# Data Retrieval and Pre-processing Notebook

In [1]:
import numpy as np
import pandas as pd
import pickle

import configparser
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

In [2]:
# Retrieve credentials
config = configparser.ConfigParser()
config.read('config.ini')
ENDPOINT = config.get('aws', 'ENDPOINT')
PORT = config.get('aws', 'PORT')
USR = config.get('aws', 'USER')
PWD = config.get('aws', 'PASSWORD')
DB = config.get('aws', 'DATABASE')

## Retrieving data to transform
The following function may be used for future implementations of the app where a live database is accessed by the frontend.  Currently, the app will receive a static csv file for conversion to a DataFrame.

In [78]:
def senator_select(senator):
    """
    Function to choose senator and return all senators' votes for bills that the chosen senator has participated
    in (includes no vote and present).
    """
    conn = psycopg2.connect(
        host=ENDPOINT,
        user=USR,
        password=PWD,
        port=PORT,
        database=DB
    )
    conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = conn.cursor()
    cursor.execute(
        f"""
        SELECT CONCAT(senators.f_name, ' ', senators.l_name), votes.csr_id, votes.position
        FROM votes
        JOIN bills ON votes.csr_id = bills.csr_id
        JOIN senators ON votes.sen_id = senators.sen_id
        WHERE votes.csr_id IN (
            SELECT csr_id
            FROM votes
            JOIN senators ON votes.sen_id = senators.sen_id
            WHERE CONCAT(senators.f_name, ' ', senators.l_name) = '{senator}'
        )
        ;
        """
    )
    votes = cursor.fetchall()
    cursor.close()
    conn.close()
    return votes

In [81]:
# Get all votes for processing
votes = senator_select('Patrick Leahy')

## Format Data for csv

In [82]:
sen_index = []
vote_cols = []
for vote in votes:
    if vote[1] not in vote_cols:
        vote_cols.append(vote[1])
    if vote[0] not in sen_index:
        sen_index.append(vote[0])

In [83]:
df = pd.DataFrame(
    index=sen_index,
    columns=vote_cols,
    dtype=int,
)

In [84]:
# Function to assign numerical values to votes
def vote2score(position):
    dicty = {'Yes': 1, 'Not Voting': 0, 'Present': 0, 'No': -1}
    return dicty[position]

In [88]:
for vote in votes:
    df.at[vote[0], vote[1]] = vote2score(vote[2])

In [90]:
df.head()

Unnamed: 0,101.1.11,101.1.12,101.1.14,101.1.15,101.1.24,101.1.25,101.1.27,101.1.28,101.1.29,101.1.30,...,116.2.129,116.2.132,116.2.133,116.2.134,116.2.135,116.2.136,116.2.137,116.2.138,116.2.139,116.2.140
Charles Grassley,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,...,1.0,-1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
Patrick Leahy,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,...,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0
Mitch McConnell,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,...,1.0,-1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
Richard Shelby,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,...,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0
Dianne Feinstein,,,,,,,,,,,...,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0


In [91]:
df.to_csv('votes.csv')