In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import requests
import json

import time

from crpapi import CRP

## Support Functions

In [2]:
def get_keys(path):
    """
    Pulls necessary api keys from designated path
    """
    with open(path) as f:
        return json.load(f)

## ETL

In [3]:
current_congress = 116

### Voteview

In [4]:
def get_voteview_csv_content(content_type, current_congress_num):
    '''
    Retrieves congress data by content type (passed in) for the 104th Congress to the current Congress today.
    
    '''
    if content_type not in ['votes', 'rollcalls', 'members']:
        print('Not a valid content type')
        return
    
    for congress_num in range(104, current_congress_num+1):
        #download the vote each member made for each roll call
        url = 'https://voteview.com/static/data/out/{}/S{}_{}.csv'.format(content_type, congress_num, content_type)

        r = requests.get(url)
        content = r.content
        file = open('{}/S{}_{}.csv'.format(content_type, congress_num, content_type), 'wb')

        file.write(content)
        file.close()
        
        #put in a sleep timer to not overload the voteview servers
        time.sleep(1)
        
    return

In [18]:
# get_voteview_csv_content('votes', current_congress)

In [19]:
# get_voteview_csv_content('rollcalls', current_congress)

In [20]:
# get_voteview_csv_content('members', current_congress)

### Open Secrets

In [5]:
#get key for open secrets api
key_path = "/Users/flatironschool/.secret/open_secrets_api.json"
keys = get_keys(key_path)

api_key = keys['api_key']

In [6]:
'''To use if abstracted library fails me'''
# url = 'https://www.opensecrets.org/api/?method=candContrib'
# method = '?method=candContrib'
# params = {'cid' = 'N00003389',
#           'cycle' = 2014,
#           'output' = 'json'}

# r = requests.get(url+method, params=params, 

'To use if abstracted library fails me'

In [7]:
crp = CRP(api_key)

In [8]:
contribs = crp.candidates.contrib('N00003389', '2014')
contribs

[{'@attributes': {'org_name': 'Blackstone Group',
   'total': '184700',
   'pacs': '0',
   'indivs': '184700'}},
 {'@attributes': {'org_name': 'Goldman Sachs',
   'total': '129025',
   'pacs': '10000',
   'indivs': '119025'}},
 {'@attributes': {'org_name': 'Humana Inc',
   'total': '104500',
   'pacs': '10000',
   'indivs': '94500'}},
 {'@attributes': {'org_name': 'NorPAC',
   'total': '100151',
   'pacs': '-249',
   'indivs': '100400'}},
 {'@attributes': {'org_name': 'Kindred Healthcare',
   'total': '95450',
   'pacs': '10000',
   'indivs': '85450'}},
 {'@attributes': {'org_name': 'JPMorgan Chase & Co',
   'total': '93075',
   'pacs': '10000',
   'indivs': '83075'}},
 {'@attributes': {'org_name': 'Citigroup Inc',
   'total': '87100',
   'pacs': '10000',
   'indivs': '77100'}},
 {'@attributes': {'org_name': 'Alliance Coal',
   'total': '86600',
   'pacs': '5000',
   'indivs': '81600'}},
 {'@attributes': {'org_name': 'Votesane PAC',
   'total': '82000',
   'pacs': '0',
   'indivs': '82

In [11]:
industries = crp.candidates.industries('N00003389', '2014')
industries

[{'@attributes': {'industry_code': 'F07',
   'industry_name': 'Securities & Investment',
   'indivs': '2157107',
   'pacs': '270600',
   'total': '2427707'}},
 {'@attributes': {'industry_code': 'W06',
   'industry_name': 'Retired',
   'indivs': '1447510',
   'pacs': '0',
   'total': '1447510'}},
 {'@attributes': {'industry_code': 'E01',
   'industry_name': 'Oil & Gas',
   'indivs': '728710',
   'pacs': '380599',
   'total': '1109309'}},
 {'@attributes': {'industry_code': 'K01',
   'industry_name': 'Lawyers/Law Firms',
   'indivs': '761864',
   'pacs': '304650',
   'total': '1066514'}},
 {'@attributes': {'industry_code': 'H01',
   'industry_name': 'Health Professionals',
   'indivs': '734995',
   'pacs': '289500',
   'total': '1024495'}},
 {'@attributes': {'industry_code': 'F09',
   'industry_name': 'Insurance',
   'indivs': '647100',
   'pacs': '366500',
   'total': '1013600'}},
 {'@attributes': {'industry_code': 'F10',
   'industry_name': 'Real Estate',
   'indivs': '876975',
   'pacs

In [12]:
sector = crp.candidates.sector('N00003389', '2014')
sector

[{'@attributes': {'sector_name': 'Agribusiness',
   'sectorid': 'A',
   'indivs': '707620',
   'pacs': '531587',
   'total': '1239207'}},
 {'@attributes': {'sector_name': 'Communic/Electronics',
   'sectorid': 'B',
   'indivs': '683400',
   'pacs': '424243',
   'total': '1107643'}},
 {'@attributes': {'sector_name': 'Construction',
   'sectorid': 'C',
   'indivs': '476426',
   'pacs': '192450',
   'total': '668876'}},
 {'@attributes': {'sector_name': 'Defense',
   'sectorid': 'D',
   'indivs': '68950',
   'pacs': '204900',
   'total': '273850'}},
 {'@attributes': {'sector_name': 'Energy/Nat Resource',
   'sectorid': 'E',
   'indivs': '1282860',
   'pacs': '760082',
   'total': '2042942'}},
 {'@attributes': {'sector_name': 'Finance/Insur/RealEst',
   'sectorid': 'F',
   'indivs': '5123131',
   'pacs': '1198450',
   'total': '6321581'}},
 {'@attributes': {'sector_name': 'Health',
   'sectorid': 'H',
   'indivs': '1691266',
   'pacs': '1057523',
   'total': '2748789'}},
 {'@attributes': {'

In [10]:
help(crp.candidates)

Help on CandidatesClient in module crpapi object:

class CandidatesClient(Client)
 |  CandidatesClient(apikey=None, cache='.cache')
 |  
 |  Retrieves and parses information pertaining to current Congressional
 |  legislators.
 |  
 |  Method resolution order:
 |      CandidatesClient
 |      Client
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  contrib(self, cid, cycle=None)
 |  
 |  contrib_by_ind(self, cid, industry, cycle=None)
 |  
 |  get(self, id_code)
 |      id_code may be either a candidate's specific CID, or a two letter
 |      state code, or a four character district code.
 |  
 |  industries(self, cid, cycle=None)
 |  
 |  pfd(self, cid, year=None)
 |  
 |  sector(self, cid, cycle=None)
 |  
 |  summary(self, cid, cycle=None)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from Client:
 |  
 |  __init__(self, apikey=None, cache='.cache')
 |      Initialize self.  See help(type(self)) for accurate signa

### MIT Election Lab

Having trouble finding the right structure for this API call for a download.

In [21]:
#get key for dataverse api
key_path = "/Users/flatironschool/.secret/dataverse_api.json"
keys = get_keys(key_path)

api_key = keys['api_key']

In [31]:
server_url = 'https://dataverse.harvard.edu/dataset'
path = '/api/access/datafile/'
data_id = 'data'
# data_id = ':persistentId/?persistentId=doi:10.7910/DVN/PEJ5QU'

# '$SERVER_URL/api/search?q=$QUERY'

# 'GET http://$SERVER/api/access/datafile/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB'

url = server_url + path + data_id

r = requests.get(url)
print(r)
print(r.url)
# print(r.content)
print(r.text[:1000])

<Response [404]>
https://dataverse.harvard.edu/dataset/api/access/datafile/data
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head id="j_idt2"><!-- Global site tag (gtag.js) - Google Analytics -->
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-61753334-1"></script>
<script>
  //<![CDATA[
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date()); gtag('config', 'UA-61753334-1');

  window.addEventListener("load", enableAnalyticsEventCapture, false);

  function enableAnalyticsEventCapture() {
    // Download button
    $(document).on("click", ".btn-download", function() {
      var category = $(this).text();
      var label = getFileId($(this));
      gtag('event', 'Download',{'event_category' : category,
                                'event_label' : label});
    });

    // Request Access button
    $(document).on("click", 

### ProPublica

In [None]:
key_path = "/Users/flatironschool/.secret/pro_publica_api.json"
keys = get_keys(key_path)

api_key = keys['api_key']

In [None]:
version = 'v1'

url = 'https://api.propublica.org/congress/{}/'.format(version)

"https://api.propublica.org/congress/v1/116/bills/hr502.json"

variables = ['DATE_CODE', 'DATE_DESC', 'POP', 'NAME']

params = {'get': ','.join(variables), 'for': granularity, 'key': api_key}

r = requests.get(url, params=params)
print(r.url)
print(r)
print(r.text[:1000])