# ProPublica Congress API

The goal of this notebook is to extract data for members, votes, and bills for available Congresses.  Data are saved to CSV files.

API documentation: https://projects.propublica.org/api-docs/congress-api/

In [1]:
import os
import requests
import simplejson as json
import csv
import pandas as pd
import logging
import pprint as pp

from pandas.io.json import json_normalize

headers = {'X-API-Key': '5dHwl0cO6ak64MN9Q8IwZDkGHg4bGazYhBD83vBs'}

logging.basicConfig(filename='pp_errors.log', level=logging.DEBUG)
logger = logging.getLogger(__name__)

class PropublicaApiError(Exception):
    """ Exception for ProPublica Congress API errors """

def api_call(params):
    try:
        url = 'https://api.propublica.org/congress/v1/%s.json' % ('/'.join(params))
        print url
        r = requests.get(url, headers=headers)
        obj = r.json()
        if 'error' in obj:
            raise PropublicaApiError(obj['error'])
        else:
            return obj
    except requests.HTTPError as e:
        # raise PropublicaApiError(e)
        logger.error(e)
    except PropublicaApiError as e:
        # raise PropublicaApiError('Invalid Response')
        logger.error(e)
    except ValueError as e:
        logger.error(e)
    finally:
        pass

### Members

In [2]:
# get list of members for given Congress
def getMembers(chamber, congress, to_csv=False):
    params = [str(congress).encode('utf-8'), chamber, 'members']
    response = api_call(params)
    if to_csv:
        member_ids = writeMembersCSV(json_file=response, status='current', chamber=chamber, congress=congress)
        return member_ids
    return response


# get list of new members for given Congress
def getNewMembers(to_csv=False):
    params = 'members/new'
    response = api_call(params)
    if to_csv:
        member_ids = writeMembersCSV(json_file=response, status='new')
        return member_ids
    return response


# get list of members leaving after given Congress
def getLeavingMembers(chamber, congress, to_csv=False):    
    params = [str(congress).encode('utf-8'), chamber, 'members/leaving']
    response = api_call(params)
    if to_csv:
        member_ids = writeMembersCSV(json_file=response, status='leaving', chamber=chamber, congress=congress)
        return member_ids
    return response


# write list of members to CSV
def writeMembersCSV(json_file, status='current', chamber='senate', congress=115):

    # load member records from JSON string to dataframe
    results = json.loads(json.dumps(json_file))['results'][0]
    members = results['members']
    df = pd.DataFrame.from_dict(members)
    df = df.assign(congress = pd.Series([congress]*df.shape[0], index=df.index))
    
    # set file names based on member status current, leaving, or new
    if status is 'current':
        file = 'raw/members_%s_%i.csv' % (chamber, congress)
    elif status is 'leaving':
        file = 'raw/members_leaving_%s_%i.csv' % (chamber, congress)
    else:
        file = 'raw/members_new.csv'

    # write dataframe to CSV
    df.to_csv(file, encoding='utf-8', index=False)
    

# get Senate members for Congresses 80-115 or House members for Congresses 102-115
def getAllMembers(chamber='senate'):
    start = 80 if chamber == 'senate' else 102
    
    for i in range(start,116):
        getMembers(chamber, i, to_csv=True)
#         getLeavingMembers(chamber, i, to_csv=True)


# combine Senate members into single file
def mergeMembers(chamber='senate'):
    start = 80 if chamber == 'senate' else 102

    merged = pd.concat([pd.read_csv('raw/members_%s_%i.csv' % (chamber, i)) for i in range(start,116)], ignore_index=True)
    merged.to_csv('members_%s.csv' % (chamber), index=False)


Pull members of House and Senate for available Congresses and write to CSV files for each member.  Merge CSV files for individual members into a single file each for House and Senate.

In [3]:
getAllMembers('senate')
getAllMembers('house')

mergeMembers('senate')
mergeMembers('house')

https://api.propublica.org/congress/v1/80/senate/members.json
https://api.propublica.org/congress/v1/81/senate/members.json
https://api.propublica.org/congress/v1/82/senate/members.json
https://api.propublica.org/congress/v1/83/senate/members.json
https://api.propublica.org/congress/v1/84/senate/members.json
https://api.propublica.org/congress/v1/85/senate/members.json
https://api.propublica.org/congress/v1/86/senate/members.json
https://api.propublica.org/congress/v1/87/senate/members.json
https://api.propublica.org/congress/v1/88/senate/members.json
https://api.propublica.org/congress/v1/89/senate/members.json
https://api.propublica.org/congress/v1/90/senate/members.json
https://api.propublica.org/congress/v1/91/senate/members.json
https://api.propublica.org/congress/v1/92/senate/members.json
https://api.propublica.org/congress/v1/93/senate/members.json
https://api.propublica.org/congress/v1/94/senate/members.json
https://api.propublica.org/congress/v1/95/senate/members.json
https://

### Votes

Recent votes for all members are downloaded to individual CSV files and merged into a single CSV file.  This subset of votes is meant to help us build a toy dataset for EDA and modeling.

In [4]:
# get recent votes for given member
def getRecentVotes(member_id, to_csv=False):
    params = ['members', member_id, 'votes']
    response = api_call(params)
    if to_csv and response is not None:
        writeRecentVotesCSV(json_file=response, member_id=member_id)
    return response


# write list of recent votes by member to CSV
def writeRecentVotesCSV(json_file, member_id):
    results = json.loads(json.dumps(json_file))['results'][0]
    votes = results['votes']
    
    # if votes exist for member, write to CSV
    if len(votes) > 2:
        df = pd.DataFrame.from_dict(votes)
        
        # add columns for bill ID, sponsor ID, bill URI if available
        df['bill_id'] = df['bill'].apply(lambda x: x['bill_id'] if len(x) > 0 and 'bill_id' in x.keys() else 'NA')
        df['sponsor_id'] = df['bill'].apply(lambda x: x['sponsor_id'] if len(x) > 0 and 'sponsor_id' in x.keys() else 'NA')
        df['bill_uri'] = df['bill'].apply(lambda x: x['bill_uri'] if len(x) > 0 and 'bill_uri' in x.keys() else 'NA')
        
        df.to_csv('raw/member_votes_%s.csv' % (member_id), encoding='utf-8', index=False)

        
# combine recent member votes into single file
def getAllRecentVotes(chamber='senate'):
    # get member IDs from master members file
    members = pd.read_csv('members_%s.csv' % (chamber))
    member_ids = members.id.unique()
    
    # get recent votes for all members
    for i in member_ids:
        getRecentVotes(i, to_csv=True)
            
            
# combine votes into single file
def mergeRecentVotes():
    merged = pd.concat([pd.read_csv('raw/%s' % (filename)) for filename in os.listdir('raw') if filename.startswith('member_votes_')], ignore_index=True)
    merged.to_csv('member_votes.csv', index=False)
    

Pull recent votes for members of House and Senate, using merged member files and write to CSV files for each member.  Merge member vote CSV files into a single CSV file.  This partial vote data set is meant to help get our model data up while we work on pulling the full vote data set.

In [302]:
getAllRecentVotes('senate')
getAllRecentVotes('house')
mergeRecentVotes()

https://api.propublica.org/congress/v1/members/A000062/votes.json
https://api.propublica.org/congress/v1/members/B000095/votes.json
https://api.propublica.org/congress/v1/members/B000099/votes.json
https://api.propublica.org/congress/v1/members/B000145/votes.json
https://api.propublica.org/congress/v1/members/B000460/votes.json
https://api.propublica.org/congress/v1/members/B000816/votes.json
https://api.propublica.org/congress/v1/members/B000820/votes.json
https://api.propublica.org/congress/v1/members/B000823/votes.json
https://api.propublica.org/congress/v1/members/B000874/votes.json
https://api.propublica.org/congress/v1/members/B000894/votes.json
https://api.propublica.org/congress/v1/members/B001013/votes.json
https://api.propublica.org/congress/v1/members/B001168/votes.json
https://api.propublica.org/congress/v1/members/B001169/votes.json
https://api.propublica.org/congress/v1/members/B001177/votes.json
https://api.propublica.org/congress/v1/members/B001208/votes.json
https://ap

Pull all votes for a given month/year and write to individual CSV files.  Then pull roll call votes with individual members' positions.

In [156]:
# get votes for a given chamber of Congress, in a given month/year
def getVotes(chamber, month, year, to_csv=False):
    params = [chamber, 'votes', str(year).encode('utf-8'), '%02d' % (month)]
    response = api_call(params)
    if to_csv and response is not None:
        writeVotesCSV(json_file=response, chamber=chamber, month=month, year=year)
    return response


# write votes for chamber, month, year to CSV
def writeVotesCSV(json_file, chamber, month, year):
    results = json.loads(json.dumps(json_file))['results']
    votes = results['votes']

    df = pd.DataFrame.from_dict(votes)
    
    if not df.empty:
        # add columns for bill ID, majority position for Democrats, Republicans
        df['bill_id'] = df['bill'].apply(lambda x: x['bill_id'] if len(x) > 0 and 'bill_id' in x.keys() else 'NA')
        df['sponsor_id'] = df['bill'].apply(lambda x: x['sponsor_id'] if len(x) > 0 and 'sponsor_id' in x.keys() else 'NA')
        df['majority_pos_rep'] = df['republican'].apply(lambda x: x['majority_position'] if len(x) > 0 and 'majority_position' in x.keys() else 'NA')
        df['majority_pos_dem'] = df['democratic'].apply(lambda x: x['majority_position'] if len(x) > 0 and 'majority_position' in x.keys() else 'NA')
        df['majority_pos_ind'] = df['independent'].apply(lambda x: 'Yes' if x['yes'] >= x['no'] else 'No')

        df.to_csv('raw/votes_%s_%s-%s.csv' % (chamber, year, '%02d' % (month)), encoding='utf-8', index=False)

        
# get votes for both chambers, all Congresses since 1947 (Propublica goes back to Congress 80 for Senate)
def getAllVotes(chamber):
    for y in range(1947,2018):
        for m in range(1,13):
            getVotes(chamber, m, y, to_csv=True)

            
# merge vote files for Congresses into single file
def mergeVotes():
    merged = pd.concat([pd.read_csv('raw/%s' % (filename)) for filename in os.listdir('raw') if filename.startswith('votes_both')], ignore_index=True)
    merged.to_csv('votes_both.csv', index=False)


# get roll call votes for a given vote (Congress, chamber, session, roll call number)
def getRollCallVotes(congress, chamber, session, roll_call, to_csv=True):
    if not os.path.isfile('raw/votes_roll_call_%s_%s_%s_%s.csv' % (chamber, congress, session, roll_call)):
        params = [str(congress), chamber, 'sessions', str(session).encode('utf-8'), 'votes', str(roll_call).encode('utf-8')]
        response = api_call(params)

        if to_csv and response is not None:
            writeRollCallVotesCSV(json_file=response, congress=congress, chamber=chamber, session=session, roll_call=roll_call)
        return response


# write roll call votes to CSV
def writeRollCallVotesCSV(json_file, congress, chamber, session, roll_call):
    results = json.loads(json.dumps(json_file))['results']
    votes = results['votes']['vote']
    
    # flatten list of positions to records and add meta data
    votes = json_normalize(votes, 'positions', ['bill', 'chamber', 'congress', 'roll_call', 'description', 'session', 'date', 'republican', 'democratic', 'independent'])
    df = pd.DataFrame.from_dict(votes)
    
    if not df.empty:
        # add columns for bill ID, majority position for Democrats, Republicans
        df['bill_id'] = df['bill'].apply(lambda x: x['bill_id'] if len(x) > 0 and 'bill_id' in x.keys() else 'NA')
        df['sponsor_id'] = df['bill'].apply(lambda x: x['sponsor_id'] if len(x) > 0 and 'sponsor_id' in x.keys() else 'NA')
        df['majority_pos_rep'] = df['republican'].apply(lambda x: x['majority_position'] if len(x) > 0 and 'majority_position' in x.keys() else 'NA')
        df['majority_pos_dem'] = df['democratic'].apply(lambda x: x['majority_position'] if len(x) > 0 and 'majority_position' in x.keys() else 'NA')
        df['majority_pos_ind'] = df['independent'].apply(lambda x: 'Yes' if 'yes' in x.keys() and x['yes'] >= x['no'] else 'No')
        
        df.to_csv('raw/votes_roll_call_%s_%s_%s_%s.csv' % (chamber, congress, session, roll_call), encoding='utf-8', index=False)
        


def getAllRollCallVotes():
    # get member IDs from master votes file
    votes = pd.read_csv('votes_both.csv', header=0)
    
    # get roll call votes for all congresses, chambers, sessions, roll calls
    for row in votes.iterrows():
        congress = row[1]['congress']
        chamber = row[1]['chamber']
        session = row[1]['session']
        roll_call = row[1]['roll_call']
        getRollCallVotes(congress=congress, chamber=chamber.lower(), session=session, roll_call=roll_call, to_csv=True)


def mergeRollCallVotes():
    pass

In [158]:
# getVotes('senate', 2, 2017, to_csv=True)
# getAllVotes('both')
# mergeVotes()

# getRollCallVotes(101, 'senate', 1, 6)
getAllRollCallVotes()

https://api.propublica.org/congress/v1/104/senate/sessions/1/votes/19.json
https://api.propublica.org/congress/v1/104/house/sessions/1/votes/2.json
https://api.propublica.org/congress/v1/104/senate/sessions/1/votes/101.json
https://api.propublica.org/congress/v1/104/senate/sessions/1/votes/465.json
https://api.propublica.org/congress/v1/105/house/sessions/1/votes/3.json
https://api.propublica.org/congress/v1/106/house/sessions/1/votes/2.json
https://api.propublica.org/congress/v1/107/house/sessions/1/votes/2.json
https://api.propublica.org/congress/v1/109/house/sessions/1/votes/2.json
https://api.propublica.org/congress/v1/110/house/sessions/2/votes/568.json
https://api.propublica.org/congress/v1/110/house/sessions/2/votes/567.json
https://api.propublica.org/congress/v1/111/senate/sessions/1/votes/195.json
https://api.propublica.org/congress/v1/111/house/sessions/1/votes/990.json
https://api.propublica.org/congress/v1/111/house/sessions/1/votes/987.json
https://api.propublica.org/congr

### Bills

In [17]:
# get all information on a bill
def getBill(congress, bill_id, to_csv=False):
    params = [str(congress).encode('utf-8'), 'bills', bill_id]
    response = api_call(params)
    if to_csv:
        writeBillCSV(response, bill_id)
    return response

def getAllVoteBills():
    pass

def getAllMemberVoteBills():
    # get bill IDs from recent member votes
    df = pd.read_csv('member_votes_clean.csv')
    bill_ids = df.bill_id.unique()

    df = pd.DataFrame()

    # get bills for recent member votes
    for b in bill_ids:
        [bill_id, congress] = str(b).split('-')
        json_file = getBill(congress, bill_id, to_csv=False)
        results = json.loads(json.dumps(json_file))['results'][0]

        # convert array values to strings
        results['committee_codes'] = '; '.join(results['committee_codes'])
        results['subcommittee_codes'] = ';'.join(results['committee_codes'])
        results['versions'] = json.dumps(results['versions'])
        results['actions'] = json.dumps(results['actions'])
        results['votes'] = json.dumps(results['votes'])

        # add bill record to result
        bill = pd.DataFrame.from_dict(results)
        df = df.append(bill, ignore_index=True)

    df.to_csv('member_votes_bills.csv', encoding='utf-8', index=False)

# write list of bills cosponsored by member to CSV
def writeBillsCSV(json_file, member_id):
    results = json.loads(json.dumps(json_file))['results'][0]
    votes = pd.DataFrame.from_dict(results['bills'])
    df.to_csv('raw/member_bills_%s.csv' % (member_id), encoding='utf-8', index=False)

    

Write all bills associated with recent member votes to a single CSV file.  This partial bill data set is meant to help get our model data up while we work on pulling the full bill data set.

In [18]:
getAllMemberVoteBills()

https://api.propublica.org/congress/v1/105/bills/hconres133.json
https://api.propublica.org/congress/v1/105/bills/hconres136.json
https://api.propublica.org/congress/v1/105/bills/hconres137.json
https://api.propublica.org/congress/v1/105/bills/hconres139.json
https://api.propublica.org/congress/v1/105/bills/hconres152.json
https://api.propublica.org/congress/v1/105/bills/hconres22.json
https://api.propublica.org/congress/v1/105/bills/hconres227.json
https://api.propublica.org/congress/v1/105/bills/hconres235.json
https://api.propublica.org/congress/v1/105/bills/hconres75.json
https://api.propublica.org/congress/v1/105/bills/hjres107.json
https://api.propublica.org/congress/v1/105/bills/hjres2.json
https://api.propublica.org/congress/v1/105/bills/hr1129.json
https://api.propublica.org/congress/v1/105/bills/hr1348.json
https://api.propublica.org/congress/v1/105/bills/hr2014.json
https://api.propublica.org/congress/v1/105/bills/hr2015.json
https://api.propublica.org/congress/v1/105/bills/

array(['hconres133-105', 'hconres136-105', 'hconres137-105', ...,
       's782-115', 's84-115', 'sconres3-115'], dtype=object)
